Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 193 |
|
0.00% |
0 / 17 |
CRAP | |
0.00% |
0 / 1 |
BackupDumper | |
0.00% |
0 / 191 |
|
0.00% |
0 / 17 |
3660 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
6 | |||
finalSetup | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
registerOutput | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
registerFilter | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
loadPlugin | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
processOptions | |
0.00% |
0 / 58 |
|
0.00% |
0 / 1 |
420 | |||
dump | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
156 | |||
initProgress | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
backupDb | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
setDB | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
__destruct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
backupServer | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
reportPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
revCount | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
report | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
showReport | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
20 | |||
progress | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * Base classes for database-dumping maintenance scripts. |
4 | * |
5 | * Copyright © 2005 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Dump |
25 | * @ingroup Maintenance |
26 | */ |
27 | |
28 | require_once __DIR__ . '/../Maintenance.php'; |
29 | require_once __DIR__ . '/../../includes/export/WikiExporter.php'; |
30 | |
31 | use MediaWiki\MainConfigNames; |
32 | use MediaWiki\Settings\SettingsBuilder; |
33 | use MediaWiki\WikiMap\WikiMap; |
34 | use Wikimedia\Rdbms\IMaintainableDatabase; |
35 | use Wikimedia\Rdbms\LoadBalancer; |
36 | |
37 | /** |
38 | * @ingroup Dump |
39 | * @ingroup Maintenance |
40 | */ |
41 | abstract class BackupDumper extends Maintenance { |
42 | /** @var bool */ |
43 | public $reporting = true; |
44 | /** @var string[]|null null means all pages */ |
45 | public $pages = null; |
46 | /** @var bool don't output <mediawiki> and <siteinfo> */ |
47 | public $skipHeader = false; |
48 | /** @var bool don't output </mediawiki> */ |
49 | public $skipFooter = false; |
50 | /** @var int */ |
51 | public $startId = 0; |
52 | /** @var int */ |
53 | public $endId = 0; |
54 | /** @var int */ |
55 | public $revStartId = 0; |
56 | /** @var int */ |
57 | public $revEndId = 0; |
58 | /** @var bool */ |
59 | public $dumpUploads = false; |
60 | /** @var bool */ |
61 | public $dumpUploadFileContents = false; |
62 | /** @var bool */ |
63 | public $orderRevs = false; |
64 | /** @var array|null */ |
65 | public $limitNamespaces = []; |
66 | /** @var resource|false */ |
67 | public $stderr; |
68 | |
69 | /** @var int */ |
70 | protected $reportingInterval = 100; |
71 | /** @var int */ |
72 | protected $pageCount = 0; |
73 | /** @var int */ |
74 | protected $revCount = 0; |
75 | /** @var string|null null means use default */ |
76 | protected $schemaVersion = null; |
77 | /** @var string|null null means use default */ |
78 | protected $server = null; |
79 | /** @var DumpMultiWriter|DumpOutput|null Output filters */ |
80 | protected $sink = null; |
81 | /** @var float */ |
82 | protected $lastTime = 0; |
83 | /** @var int */ |
84 | protected $pageCountLast = 0; |
85 | /** @var int */ |
86 | protected $revCountLast = 0; |
87 | |
88 | /** @var string[] */ |
89 | protected $outputTypes = []; |
90 | /** @var string[] */ |
91 | protected $filterTypes = []; |
92 | |
93 | /** @var int */ |
94 | protected $ID = 0; |
95 | |
96 | /** @var float */ |
97 | protected $startTime; |
98 | /** @var int */ |
99 | protected $pageCountPart; |
100 | /** @var int */ |
101 | protected $revCountPart; |
102 | /** @var int */ |
103 | protected $maxCount; |
104 | /** @var float */ |
105 | protected $timeOfCheckpoint; |
106 | /** @var ExportProgressFilter */ |
107 | protected $egress; |
108 | /** @var string */ |
109 | protected $buffer; |
110 | /** @var array|false */ |
111 | protected $openElement; |
112 | /** @var bool */ |
113 | protected $atStart; |
114 | /** @var string|null */ |
115 | protected $thisRevModel; |
116 | /** @var string|null */ |
117 | protected $thisRevFormat; |
118 | /** @var string */ |
119 | protected $lastName; |
120 | /** @var string */ |
121 | protected $state; |
122 | |
123 | /** |
124 | * The dependency-injected database to use. |
125 | * |
126 | * @var IMaintainableDatabase|null |
127 | * |
128 | * @see self::setDB |
129 | */ |
130 | protected $forcedDb = null; |
131 | |
132 | /** @var LoadBalancer */ |
133 | protected $lb; |
134 | |
135 | /** |
136 | * @param array|null $args For backward compatibility |
137 | */ |
138 | public function __construct( $args = null ) { |
139 | parent::__construct(); |
140 | $this->stderr = fopen( "php://stderr", "wt" ); |
141 | |
142 | // Built-in output and filter plugins |
143 | $this->registerOutput( 'file', DumpFileOutput::class ); |
144 | $this->registerOutput( 'gzip', DumpGZipOutput::class ); |
145 | $this->registerOutput( 'bzip2', DumpBZip2Output::class ); |
146 | $this->registerOutput( 'dbzip2', DumpDBZip2Output::class ); |
147 | $this->registerOutput( 'lbzip2', DumpLBZip2Output::class ); |
148 | $this->registerOutput( '7zip', Dump7ZipOutput::class ); |
149 | |
150 | $this->registerFilter( 'latest', DumpLatestFilter::class ); |
151 | $this->registerFilter( 'notalk', DumpNotalkFilter::class ); |
152 | $this->registerFilter( 'namespace', DumpNamespaceFilter::class ); |
153 | |
154 | // These three can be specified multiple times |
155 | $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].', |
156 | false, true, false, true ); |
157 | $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' . |
158 | '<type>s: file, gzip, bzip2, 7zip, dbzip2, lbzip2', false, true, 'o', true ); |
159 | $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' . |
160 | '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true ); |
161 | $this->addOption( 'report', 'Report position and speed after every n pages processed. ' . |
162 | 'Default: 100.', false, true ); |
163 | $this->addOption( 'server', 'Force reading from MySQL server', false, true ); |
164 | $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' . |
165 | '-mx option to 7za command.', false, true ); |
166 | // NOTE: we can't know the default schema version yet, since configuration has not been |
167 | // loaded when this constructor is called. To work around this, we re-declare |
168 | // this option in validateParamsAndArgs(). |
169 | $this->addOption( 'schema-version', 'Schema version to use for output.', false, true ); |
170 | |
171 | if ( $args ) { |
172 | // Args should be loaded and processed so that dump() can be called directly |
173 | // instead of execute() |
174 | $this->loadWithArgv( $args ); |
175 | $this->processOptions(); |
176 | } |
177 | } |
178 | |
179 | public function finalSetup( SettingsBuilder $settingsBuilder ) { |
180 | parent::finalSetup( $settingsBuilder ); |
181 | // re-declare the --schema-version option to include the default schema version |
182 | // in the description. |
183 | $schemaVersion = $settingsBuilder->getConfig()->get( MainConfigNames::XmlDumpSchemaVersion ); |
184 | $this->addOption( 'schema-version', 'Schema version to use for output. ' . |
185 | 'Default: ' . $schemaVersion, false, true ); |
186 | } |
187 | |
188 | /** |
189 | * @param string $name |
190 | * @param string $class Name of output filter plugin class |
191 | */ |
192 | public function registerOutput( $name, $class ) { |
193 | $this->outputTypes[$name] = $class; |
194 | } |
195 | |
196 | /** |
197 | * @param string $name |
198 | * @param string $class Name of filter plugin class |
199 | */ |
200 | public function registerFilter( $name, $class ) { |
201 | $this->filterTypes[$name] = $class; |
202 | } |
203 | |
204 | /** |
205 | * Load a plugin and register it |
206 | * |
207 | * @param string $class Name of plugin class; must have a static 'register' |
208 | * method that takes a BackupDumper as a parameter. |
209 | * @param string $file Full or relative path to the PHP file to load, or empty |
210 | */ |
211 | public function loadPlugin( $class, $file ) { |
212 | if ( $file != '' ) { |
213 | require_once $file; |
214 | } |
215 | $register = [ $class, 'register' ]; |
216 | $register( $this ); |
217 | } |
218 | |
219 | /** |
220 | * Processes arguments and sets $this->$sink accordingly |
221 | */ |
222 | protected function processOptions() { |
223 | $sink = null; |
224 | $sinks = []; |
225 | |
226 | $this->schemaVersion = WikiExporter::schemaVersion(); |
227 | |
228 | $options = $this->orderedOptions; |
229 | foreach ( $options as [ $opt, $param ] ) { |
230 | switch ( $opt ) { |
231 | case 'plugin': |
232 | $val = explode( ':', $param, 2 ); |
233 | |
234 | if ( count( $val ) === 1 ) { |
235 | $this->loadPlugin( $val[0], '' ); |
236 | } elseif ( count( $val ) === 2 ) { |
237 | $this->loadPlugin( $val[0], $val[1] ); |
238 | } |
239 | |
240 | break; |
241 | case 'output': |
242 | $split = explode( ':', $param, 2 ); |
243 | if ( count( $split ) !== 2 ) { |
244 | $this->fatalError( 'Invalid output parameter' ); |
245 | } |
246 | [ $type, $file ] = $split; |
247 | if ( $sink !== null ) { |
248 | $sinks[] = $sink; |
249 | } |
250 | if ( !isset( $this->outputTypes[$type] ) ) { |
251 | $this->fatalError( "Unrecognized output sink type '$type'" ); |
252 | } |
253 | $class = $this->outputTypes[$type]; |
254 | if ( $type === "7zip" ) { |
255 | $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) ); |
256 | } else { |
257 | $sink = new $class( $file ); |
258 | } |
259 | |
260 | break; |
261 | case 'filter': |
262 | if ( $sink === null ) { |
263 | $sink = new DumpOutput(); |
264 | } |
265 | |
266 | $split = explode( ':', $param, 2 ); |
267 | $key = $split[0]; |
268 | |
269 | if ( !isset( $this->filterTypes[$key] ) ) { |
270 | $this->fatalError( "Unrecognized filter type '$key'" ); |
271 | } |
272 | |
273 | $type = $this->filterTypes[$key]; |
274 | |
275 | if ( count( $split ) === 2 ) { |
276 | $filter = new $type( $sink, $split[1] ); |
277 | } else { |
278 | $filter = new $type( $sink ); |
279 | } |
280 | |
281 | // references are lame in php... |
282 | unset( $sink ); |
283 | $sink = $filter; |
284 | |
285 | break; |
286 | case 'schema-version': |
287 | if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) { |
288 | $this->fatalError( |
289 | "Unsupported schema version $param. Supported versions: " . |
290 | implode( ', ', XmlDumpWriter::$supportedSchemas ) |
291 | ); |
292 | } |
293 | $this->schemaVersion = $param; |
294 | break; |
295 | } |
296 | } |
297 | |
298 | if ( $this->hasOption( 'report' ) ) { |
299 | $this->reportingInterval = intval( $this->getOption( 'report' ) ); |
300 | } |
301 | |
302 | if ( $this->hasOption( 'server' ) ) { |
303 | $this->server = $this->getOption( 'server' ); |
304 | } |
305 | |
306 | if ( $sink === null ) { |
307 | $sink = new DumpOutput(); |
308 | } |
309 | $sinks[] = $sink; |
310 | |
311 | if ( count( $sinks ) > 1 ) { |
312 | $this->sink = new DumpMultiWriter( $sinks ); |
313 | } else { |
314 | $this->sink = $sink; |
315 | } |
316 | } |
317 | |
318 | public function dump( $history, $text = WikiExporter::TEXT ) { |
319 | # Notice messages will foul up your XML output even if they're |
320 | # relatively harmless. |
321 | if ( ini_get( 'display_errors' ) ) { |
322 | ini_set( 'display_errors', 'stderr' ); |
323 | } |
324 | |
325 | $this->initProgress( $history ); |
326 | |
327 | $db = $this->backupDb(); |
328 | $services = $this->getServiceContainer(); |
329 | $exporter = $services->getWikiExporterFactory()->getWikiExporter( |
330 | $db, |
331 | $history, |
332 | $text, |
333 | $this->limitNamespaces |
334 | ); |
335 | $exporter->setSchemaVersion( $this->schemaVersion ); |
336 | $exporter->dumpUploads = $this->dumpUploads; |
337 | $exporter->dumpUploadFileContents = $this->dumpUploadFileContents; |
338 | |
339 | $wrapper = new ExportProgressFilter( $this->sink, $this ); |
340 | $exporter->setOutputSink( $wrapper ); |
341 | |
342 | if ( !$this->skipHeader ) { |
343 | $exporter->openStream(); |
344 | } |
345 | # Log item dumps: all or by range |
346 | if ( $history & WikiExporter::LOGS ) { |
347 | if ( $this->startId || $this->endId ) { |
348 | $exporter->logsByRange( $this->startId, $this->endId ); |
349 | } else { |
350 | $exporter->allLogs(); |
351 | } |
352 | } elseif ( $this->pages === null ) { |
353 | # Page dumps: all or by page ID range |
354 | if ( $this->startId || $this->endId ) { |
355 | $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs ); |
356 | } elseif ( $this->revStartId || $this->revEndId ) { |
357 | $exporter->revsByRange( $this->revStartId, $this->revEndId ); |
358 | } else { |
359 | $exporter->allPages(); |
360 | } |
361 | } else { |
362 | # Dump of specific pages |
363 | $exporter->pagesByName( $this->pages ); |
364 | } |
365 | |
366 | if ( !$this->skipFooter ) { |
367 | $exporter->closeStream(); |
368 | } |
369 | |
370 | $this->report( true ); |
371 | } |
372 | |
373 | /** |
374 | * Initialise starting time and maximum revision count. |
375 | * We'll make ETA calculations based on progress, assuming relatively |
376 | * constant per-revision rate. |
377 | * @param int $history WikiExporter::CURRENT or WikiExporter::FULL |
378 | */ |
379 | public function initProgress( $history = WikiExporter::FULL ) { |
380 | $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision'; |
381 | $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id'; |
382 | |
383 | $dbr = $this->forcedDb; |
384 | if ( $this->forcedDb === null ) { |
385 | $dbr = $this->getDB( DB_REPLICA, [ 'dump' ] ); |
386 | } |
387 | $this->maxCount = $dbr->newSelectQueryBuilder() |
388 | ->select( "MAX($field)" ) |
389 | ->from( $table ) |
390 | ->caller( __METHOD__ )->fetchField(); |
391 | $this->startTime = microtime( true ); |
392 | $this->lastTime = $this->startTime; |
393 | $this->ID = getmypid(); |
394 | } |
395 | |
396 | /** |
397 | * @todo Fixme: the --server parameter is currently not respected, as it |
398 | * doesn't seem terribly easy to ask the load balancer for a particular |
399 | * connection by name. |
400 | * @return IMaintainableDatabase |
401 | */ |
402 | protected function backupDb() { |
403 | if ( $this->forcedDb !== null ) { |
404 | return $this->forcedDb; |
405 | } |
406 | |
407 | $lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory(); |
408 | $this->lb = $lbFactory->newMainLB(); |
409 | $db = $this->lb->getMaintenanceConnectionRef( DB_REPLICA, 'dump' ); |
410 | |
411 | // Discourage the server from disconnecting us if it takes a long time |
412 | // to read out the big ol' batch query. |
413 | $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] ); |
414 | |
415 | return $db; |
416 | } |
417 | |
418 | /** |
419 | * Force the dump to use the provided database connection for database |
420 | * operations, wherever possible. |
421 | * |
422 | * @param IMaintainableDatabase $db The database connection to use |
423 | */ |
424 | public function setDB( IMaintainableDatabase $db ) { |
425 | parent::setDB( $db ); |
426 | $this->forcedDb = $db; |
427 | } |
428 | |
429 | public function __destruct() { |
430 | if ( isset( $this->lb ) ) { |
431 | $this->lb->closeAll( __METHOD__ ); |
432 | } |
433 | } |
434 | |
435 | protected function backupServer() { |
436 | global $wgDBserver; |
437 | |
438 | return $this->server ?: $wgDBserver; |
439 | } |
440 | |
441 | public function reportPage() { |
442 | $this->pageCount++; |
443 | } |
444 | |
445 | public function revCount() { |
446 | $this->revCount++; |
447 | $this->report(); |
448 | } |
449 | |
450 | public function report( $final = false ) { |
451 | if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) { |
452 | $this->showReport(); |
453 | } |
454 | } |
455 | |
456 | public function showReport() { |
457 | if ( $this->reporting ) { |
458 | $now = wfTimestamp( TS_DB ); |
459 | $nowts = microtime( true ); |
460 | $deltaAll = $nowts - $this->startTime; |
461 | $deltaPart = $nowts - $this->lastTime; |
462 | $this->pageCountPart = $this->pageCount - $this->pageCountLast; |
463 | $this->revCountPart = $this->revCount - $this->revCountLast; |
464 | |
465 | if ( $deltaAll ) { |
466 | $portion = $this->revCount / $this->maxCount; |
467 | $eta = $this->startTime + $deltaAll / $portion; |
468 | $etats = wfTimestamp( TS_DB, intval( $eta ) ); |
469 | $pageRate = $this->pageCount / $deltaAll; |
470 | $revRate = $this->revCount / $deltaAll; |
471 | } else { |
472 | $pageRate = '-'; |
473 | $revRate = '-'; |
474 | $etats = '-'; |
475 | } |
476 | if ( $deltaPart ) { |
477 | $pageRatePart = $this->pageCountPart / $deltaPart; |
478 | $revRatePart = $this->revCountPart / $deltaPart; |
479 | } else { |
480 | $pageRatePart = '-'; |
481 | $revRatePart = '-'; |
482 | } |
483 | |
484 | $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId(); |
485 | $this->progress( sprintf( |
486 | "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), " |
487 | . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]", |
488 | $now, $dbDomain, $this->ID, $this->pageCount, $pageRate, |
489 | $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats, |
490 | $this->maxCount |
491 | ) ); |
492 | $this->lastTime = $nowts; |
493 | $this->revCountLast = $this->revCount; |
494 | } |
495 | } |
496 | |
497 | protected function progress( $string ) { |
498 | if ( $this->reporting ) { |
499 | fwrite( $this->stderr, $string . "\n" ); |
500 | } |
501 | } |
502 | } |