Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 183
0.00% covered (danger)
0.00%
0 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
MoveToExternal
0.00% covered (danger)
0.00%
0 / 183
0.00% covered (danger)
0.00%
0 / 8
2256
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 31
0.00% covered (danger)
0.00%
0 / 1
42
 doMoveToExternal
0.00% covered (danger)
0.00%
0 / 86
0.00% covered (danger)
0.00%
0 / 1
420
 compress
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
12
 resolveLegacyEncoding
0.00% covered (danger)
0.00%
0 / 19
0.00% covered (danger)
0.00%
0 / 1
72
 resolveStubs
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
56
 getConditions
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 resolveText
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2/**
3 * Move text from the text table to external storage
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance ExternalStorage
22 */
23
24use MediaWiki\MainConfigNames;
25use MediaWiki\Maintenance\UndoLog;
26use MediaWiki\Storage\SqlBlobStore;
27use Wikimedia\AtEase\AtEase;
28use Wikimedia\Rdbms\IExpression;
29use Wikimedia\Rdbms\LikeValue;
30
31// @codeCoverageIgnoreStart
32require_once __DIR__ . '/../Maintenance.php';
33// @codeCoverageIgnoreEnd
34
35class MoveToExternal extends Maintenance {
36    /** @var ResolveStubs */
37    private $resolveStubs;
38    /** @var int */
39    private $reportingInterval;
40    /** @var int */
41    private $minID;
42    /** @var int */
43    private $maxID;
44    /** @var string */
45    private $esType;
46    /** @var string */
47    private $esLocation;
48    /** @var int */
49    private $threshold;
50    /** @var bool */
51    private $gzip;
52    /** @var bool */
53    private $skipResolve;
54    /** @var string|null */
55    private $legacyEncoding;
56    /** @var bool */
57    private $dryRun;
58    /** @var UndoLog */
59    private $undoLog;
60
61    public function __construct() {
62        parent::__construct();
63
64        $this->setBatchSize( 1000 );
65
66        $this->addOption( 'start', 'start old_id', false, true, 's' );
67        $this->addOption( 'end', 'end old_id', false, true, 'e' );
68        $this->addOption( 'threshold', 'minimum size in bytes', false, true );
69        $this->addOption( 'reporting-interval',
70            'show a message after this many revisions', false, true );
71        $this->addOption( 'undo', 'filename for undo SQL', false, true );
72
73        $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' );
74        $this->addOption( 'skip-resolve',
75            'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
76        $this->addOption( 'iconv', 'Resolve legacy character encoding' );
77        $this->addOption( 'dry-run', 'Don\'t modify any rows' );
78
79        $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' );
80        $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' );
81    }
82
83    public function execute() {
84        $this->resolveStubs = new ResolveStubs;
85        $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore"
86        $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift"
87        $dbw = $this->getPrimaryDB();
88
89        $maxID = $this->getOption( 'end' ) ?? $dbw->newSelectQueryBuilder()
90            ->select( 'MAX(old_id)' )
91            ->from( 'text' )
92            ->caller( __METHOD__ )->fetchField();
93        $this->maxID = (int)$maxID;
94        $this->minID = (int)$this->getOption( 'start', 1 );
95
96        $this->reportingInterval = $this->getOption( 'reporting-interval', 100 );
97        $this->threshold = (int)$this->getOption( 'threshold', 0 );
98
99        if ( $this->getOption( 'skip-gzip' ) ) {
100            $this->gzip = false;
101        } elseif ( !function_exists( 'gzdeflate' ) ) {
102            $this->fatalError( "gzdeflate() not found. " .
103                "Please run with --skip-gzip if you don't want to compress revisions." );
104        } else {
105            $this->gzip = true;
106        }
107
108        $this->skipResolve = $this->getOption( 'skip-resolve' );
109
110        if ( $this->getOption( 'iconv' ) ) {
111            $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding );
112            if ( $legacyEncoding ) {
113                $this->legacyEncoding = $legacyEncoding;
114            } else {
115                $this->output( "iconv requested but the wiki has no legacy encoding\n" );
116            }
117        }
118        $this->dryRun = $this->getOption( 'dry-run', false );
119
120        $undo = $this->getOption( 'undo' );
121        try {
122            $this->undoLog = new UndoLog( $undo, $dbw );
123        } catch ( RuntimeException $e ) {
124            $this->fatalError( "Unable to open undo log" );
125        }
126        $this->resolveStubs->setUndoLog( $this->undoLog );
127
128        return $this->doMoveToExternal();
129    }
130
131    private function doMoveToExternal() {
132        $success = true;
133        $dbr = $this->getReplicaDB();
134
135        $count = $this->maxID - $this->minID + 1;
136        $blockSize = $this->getBatchSize();
137        $numBlocks = ceil( $count / $blockSize );
138        print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
139
140        $esFactory = $this->getServiceContainer()->getExternalStoreFactory();
141        $extStore = $esFactory->getStore( $this->esType );
142        $numMoved = 0;
143        $stubIDs = [];
144
145        for ( $block = 0; $block < $numBlocks; $block++ ) {
146            $blockStart = $block * $blockSize + $this->minID;
147            $blockEnd = $blockStart + $blockSize - 1;
148
149            if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
150                $this->output( "oldid=$blockStart, moved=$numMoved\n" );
151                $this->waitForReplication();
152            }
153
154            $res = $dbr->newSelectQueryBuilder()
155                ->select( [ 'old_id', 'old_flags', 'old_text' ] )
156                ->from( 'text' )
157                ->where( $this->getConditions( $blockStart, $blockEnd, $dbr ) )
158                ->caller( __METHOD__ )->fetchResultSet();
159            foreach ( $res as $row ) {
160                $text = $row->old_text;
161                $id = $row->old_id;
162                $flags = SqlBlobStore::explodeFlags( $row->old_flags );
163                [ $text, $flags ] = $this->resolveText( $text, $flags );
164
165                if ( $text === false ) {
166                    $success = false;
167                }
168
169                if ( in_array( 'error', $flags ) ) {
170                    continue;
171                } elseif ( in_array( 'object', $flags ) ) {
172                    $obj = unserialize( $text );
173                    if ( $obj instanceof HistoryBlobStub ) {
174                        // Handle later, after CGZ resolution
175                        if ( !$this->skipResolve ) {
176                            $stubIDs[] = $id;
177                        }
178                        continue;
179                    } elseif ( $obj instanceof HistoryBlobCurStub ) {
180                        // Copy cur text to ES
181                        $newText = $obj->getText();
182                        if ( $newText === false ) {
183                            print "Warning: Could not fetch revision blob {$id}{$text}\n";
184                            $success = false;
185                            continue;
186                        }
187
188                        [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] );
189
190                        if ( $text === false ) {
191                            print "Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}{$newText}\n";
192                            $success = false;
193                            continue;
194                        }
195
196                        [ $text, $flags ] = $this->compress( $text, $flags );
197                    } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) {
198                        // Store as is
199                    } else {
200                        $className = get_class( $obj );
201                        print "Warning: old_id=$id unrecognised object class \"$className\"\n";
202                        $success = false;
203                        continue;
204                    }
205                } elseif ( strlen( $text ) < $this->threshold ) {
206                    // Don't move small revisions
207                    continue;
208                } else {
209                    [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
210                    [ $newText, $flags ] = $this->compress( $text, $flags );
211                    if ( $newText === false ) {
212                        print "Warning: Could not compress revision blob {$id}{$text}\n";
213                        $success = false;
214                        continue;
215                    }
216                    $text = $newText;
217                }
218                $flags[] = 'external';
219                $flagsString = implode( ',', $flags );
220
221                if ( $this->dryRun ) {
222                    $this->output( "Move $id => $flagsString " .
223                        addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) .
224                        "\n"
225                    );
226                    continue;
227                }
228
229                $url = $extStore->store( $this->esLocation, $text );
230                if ( !$url ) {
231                    $this->fatalError( "Error writing to external storage" );
232                }
233                $moved = $this->undoLog->update(
234                    'text',
235                    [ 'old_flags' => $flagsString, 'old_text' => $url ],
236                    (array)$row,
237                    __METHOD__
238                );
239                if ( $moved ) {
240                    $numMoved++;
241                } else {
242                    print "Update of old_id $id failed, affected zero rows\n";
243                    $success = false;
244                }
245            }
246        }
247
248        if ( count( $stubIDs ) ) {
249            $this->resolveStubs( $stubIDs );
250        }
251
252        return $success;
253    }
254
255    private function compress( $text, $flags ) {
256        if ( $this->gzip && !in_array( 'gzip', $flags ) ) {
257            $flags[] = 'gzip';
258            $text = gzdeflate( $text );
259        }
260        return [ $text, $flags ];
261    }
262
263    private function resolveLegacyEncoding( $text, $flags ) {
264        if ( $this->legacyEncoding !== null
265            && !in_array( 'utf-8', $flags )
266            && !in_array( 'utf8', $flags )
267        ) {
268            // First decompress the entry so we don't try to convert a binary gzip to utf-8
269            if ( in_array( 'gzip', $flags ) ) {
270                if ( !$this->gzip ) {
271                    return [ $text, $flags ];
272                }
273                $flags = array_diff( $flags, [ 'gzip' ] );
274                $newText = gzinflate( $text );
275                if ( $newText === false ) {
276                    return [ false, $flags ];
277                }
278                $text = $newText;
279            }
280            AtEase::suppressWarnings();
281            $newText = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text );
282            AtEase::restoreWarnings();
283            if ( $newText === false ) {
284                return [ false, $flags ];
285            }
286            $text = $newText;
287            $flags[] = 'utf-8';
288        }
289        return [ $text, $flags ];
290    }
291
292    private function resolveStubs( $stubIDs ) {
293        if ( $this->dryRun ) {
294            print "Note: resolving stubs in dry run mode is expected to fail, " .
295                "because the main blobs have not been moved to external storage.\n";
296        }
297
298        $dbr = $this->getReplicaDB();
299        $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" );
300        $numResolved = 0;
301        $numTotal = 0;
302        foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) {
303            $res = $dbr->newSelectQueryBuilder()
304                ->select( [ 'old_id', 'old_flags', 'old_text' ] )
305                ->from( 'text' )
306                ->where( [ 'old_id' => $stubBatch ] )
307                ->caller( __METHOD__ )->fetchResultSet();
308            foreach ( $res as $row ) {
309                $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
310                $numTotal++;
311                if ( $this->reportingInterval
312                    && $numTotal % $this->reportingInterval == 0
313                ) {
314                    $this->output( "$numTotal stubs processed\n" );
315                    $this->waitForReplication();
316                }
317            }
318        }
319        $this->output( "$numResolved of $numTotal stubs resolved\n" );
320    }
321
322    protected function getConditions( $blockStart, $blockEnd, $dbr ) {
323        return [
324            $dbr->expr( 'old_id', '>=', $blockStart ),
325            $dbr->expr( 'old_id', '>=', $blockEnd ),
326            $dbr->expr( 'old_flags', IExpression::NOT_LIKE,
327                new LikeValue( $dbr->anyString(), 'external', $dbr->anyString() ) ),
328        ];
329    }
330
331    protected function resolveText( $text, $flags ) {
332        return [ $text, $flags ];
333    }
334}
335
336// @codeCoverageIgnoreStart
337$maintClass = MoveToExternal::class;
338require_once RUN_MAINTENANCE_IF_MAIN;
339// @codeCoverageIgnoreEnd