MediaWiki master
BackupDumper.php
Go to the documentation of this file.
1<?php
14namespace MediaWiki\Maintenance;
15
16// @codeCoverageIgnoreStart
17require_once __DIR__ . '/../Maintenance.php';
18require_once __DIR__ . '/../../includes/Export/WikiExporter.php';
19// @codeCoverageIgnoreEnd
20
21use DumpFilter;
23use DumpOutput;
28use WikiExporter;
31use Wikimedia\Timestamp\ConvertibleTimestamp;
32use Wikimedia\Timestamp\TimestampFormat as TS;
34
39abstract class BackupDumper extends Maintenance {
41 public $reporting = true;
43 public $pages = null;
45 public $skipHeader = false;
47 public $skipFooter = false;
49 public $startId = 0;
51 public $endId = 0;
53 public $revStartId = 0;
55 public $revEndId = 0;
57 public $dumpUploads = false;
61 public $orderRevs = false;
63 public $limitNamespaces = [];
65 public $stderr;
66
68 protected $reportingInterval = 100;
70 protected $pageCount = 0;
72 protected $revCount = 0;
74 protected $schemaVersion = null;
76 protected $sink = null;
78 protected $lastTime = 0;
80 protected $pageCountLast = 0;
82 protected $revCountLast = 0;
83
85 protected $outputTypes = [];
87 protected $filterTypes = [];
88
90 protected $ID = 0;
91
93 protected $startTime;
95 protected $pageCountPart;
97 protected $revCountPart;
99 protected $maxCount;
103 protected $egress;
105 protected $buffer;
107 protected $openElement;
109 protected $atStart;
111 protected $thisRevModel;
113 protected $thisRevFormat;
115 protected $lastName;
117 protected $state;
118
126 protected $forcedDb = null;
127
131 public function __construct( $args = null ) {
132 parent::__construct();
133 $this->stderr = fopen( "php://stderr", "wt" );
134
135 // Built-in output and filter plugins
136 $this->registerOutput( 'file', \DumpFileOutput::class );
137 $this->registerOutput( 'gzip', \DumpGZipOutput::class );
138 $this->registerOutput( 'bzip2', \DumpBZip2Output::class );
139 $this->registerOutput( 'dbzip2', \DumpDBZip2Output::class );
140 $this->registerOutput( 'lbzip2', \DumpLBZip2Output::class );
141 $this->registerOutput( '7zip', \Dump7ZipOutput::class );
142
143 $this->registerFilter( 'latest', \DumpLatestFilter::class );
144 $this->registerFilter( 'notalk', \DumpNotalkFilter::class );
145 $this->registerFilter( 'namespace', \DumpNamespaceFilter::class );
146
147 // These three can be specified multiple times
148 $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
149 false, true, false, true );
150 $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
151 '<type>s: file, gzip, bzip2, 7zip, dbzip2, lbzip2', false, true, 'o', true );
152 $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
153 '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
154 $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
155 'Default: 100.', false, true );
156 $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
157 '-mx option to 7za command.', false, true );
158 // NOTE: we can't know the default schema version yet, since configuration has not been
159 // loaded when this constructor is called. To work around this, we re-declare
160 // this option in validateParamsAndArgs().
161 $this->addOption( 'schema-version', 'Schema version to use for output.', false, true );
162
163 if ( $args ) {
164 // Args should be loaded and processed so that dump() can be called directly
165 // instead of execute()
166 $this->loadWithArgv( $args );
167 $this->processOptions();
168 }
169 }
170
171 public function finalSetup( SettingsBuilder $settingsBuilder ) {
172 parent::finalSetup( $settingsBuilder );
173 // re-declare the --schema-version option to include the default schema version
174 // in the description.
176 $this->addOption( 'schema-version', 'Schema version to use for output. ' .
177 'Default: ' . $schemaVersion, false, true );
178 }
179
184 public function registerOutput( $name, $class ) {
185 $this->outputTypes[$name] = $class;
186 }
187
192 public function registerFilter( $name, $class ) {
193 $this->filterTypes[$name] = $class;
194 }
195
203 public function loadPlugin( $class, $file ) {
204 if ( $file != '' ) {
205 require_once $file;
206 }
207 $register = [ $class, 'register' ];
208 $register( $this );
209 }
210
214 protected function processOptions() {
215 $sink = null;
216 $sinks = [];
217
218 $this->schemaVersion = WikiExporter::schemaVersion();
219
220 $options = $this->orderedOptions;
221 foreach ( $options as [ $opt, $param ] ) {
222 switch ( $opt ) {
223 case 'plugin':
224 $val = explode( ':', $param, 2 );
225
226 if ( count( $val ) === 1 ) {
227 $this->loadPlugin( $val[0], '' );
228 } elseif ( count( $val ) === 2 ) {
229 $this->loadPlugin( $val[0], $val[1] );
230 }
231
232 break;
233 case 'output':
234 $split = explode( ':', $param, 2 );
235 if ( count( $split ) !== 2 ) {
236 $this->fatalError( 'Invalid output parameter' );
237 }
238 [ $type, $file ] = $split;
239 if ( $sink !== null ) {
240 $sinks[] = $sink;
241 }
242 if ( !isset( $this->outputTypes[$type] ) ) {
243 $this->fatalError( "Unrecognized output sink type '$type'" );
244 }
245 $class = $this->outputTypes[$type];
246 if ( $type === "7zip" ) {
247 $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
248 } else {
249 $sink = new $class( $file );
250 }
251
252 break;
253 case 'filter':
254 $sink ??= new DumpOutput();
255
256 $split = explode( ':', $param, 2 );
257 $key = $split[0];
258
259 if ( !isset( $this->filterTypes[$key] ) ) {
260 $this->fatalError( "Unrecognized filter type '$key'" );
261 }
262
263 $type = $this->filterTypes[$key];
264
265 if ( count( $split ) === 2 ) {
266 $filter = new $type( $sink, $split[1] );
267 } else {
268 $filter = new $type( $sink );
269 }
270
271 // references are lame in php...
272 unset( $sink );
273 $sink = $filter;
274
275 break;
276 case 'schema-version':
277 if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) {
278 $this->fatalError(
279 "Unsupported schema version $param. Supported versions: " .
280 implode( ', ', XmlDumpWriter::$supportedSchemas )
281 );
282 }
283 $this->schemaVersion = $param;
284 break;
285 }
286 }
287
288 if ( $this->hasOption( 'report' ) ) {
289 $this->reportingInterval = intval( $this->getOption( 'report' ) );
290 }
291
292 $sink ??= new DumpOutput();
293 $sinks[] = $sink;
294
295 if ( count( $sinks ) > 1 ) {
296 $this->sink = new DumpMultiWriter( $sinks );
297 } else {
298 $this->sink = $sink;
299 }
300 }
301
306 public function dump( $history, $text = WikiExporter::TEXT ) {
307 # Notice messages will foul up your XML output even if they're
308 # relatively harmless.
309 if ( ini_get( 'display_errors' ) ) {
310 ini_set( 'display_errors', 'stderr' );
311 }
312
313 $this->initProgress( $history );
314
315 $services = $this->getServiceContainer();
316 $exporter = $services->getWikiExporterFactory()->getWikiExporter(
317 $this->getBackupDatabase(),
318 $history,
319 $text,
320 $this->limitNamespaces
321 );
322 $exporter->setSchemaVersion( $this->schemaVersion );
323 $exporter->dumpUploads = $this->dumpUploads;
324 $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
325
326 $wrapper = new ExportProgressFilter( $this->sink, $this );
327 $exporter->setOutputSink( $wrapper );
328
329 if ( !$this->skipHeader ) {
330 $exporter->openStream();
331 }
332 # Log item dumps: all or by range
333 if ( $history & WikiExporter::LOGS ) {
334 if ( $this->startId || $this->endId ) {
335 $exporter->logsByRange( $this->startId, $this->endId );
336 } else {
337 $exporter->allLogs();
338 }
339 } elseif ( $this->pages === null ) {
340 # Page dumps: all or by page ID range
341 if ( $this->startId || $this->endId ) {
342 $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs );
343 } elseif ( $this->revStartId || $this->revEndId ) {
344 $exporter->revsByRange( $this->revStartId, $this->revEndId );
345 } else {
346 $exporter->allPages();
347 }
348 } else {
349 # Dump of specific pages
350 $exporter->pagesByName( $this->pages );
351 }
352
353 if ( !$this->skipFooter ) {
354 $exporter->closeStream();
355 }
356
357 $this->report( true );
358 }
359
366 public function initProgress( $history = WikiExporter::FULL ) {
367 $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
368 $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
369
370 $dbr = $this->forcedDb;
371 if ( $this->forcedDb === null ) {
372 $dbr = $this->getDB( DB_REPLICA, [ 'dump' ] );
373 }
374 $this->maxCount = $dbr->newSelectQueryBuilder()
375 ->select( "MAX($field)" )
376 ->from( $table )
377 ->caller( __METHOD__ )->fetchField();
378 $this->startTime = microtime( true );
379 $this->lastTime = $this->startTime;
380 $this->ID = getmypid();
381 }
382
386 protected function getBackupDatabase() {
387 if ( $this->forcedDb !== null ) {
388 return $this->forcedDb;
389 }
390
391 $db = $this->getServiceContainer()
392 ->getDBLoadBalancerFactory()
393 ->getMainLB()
394 ->getConnection( DB_REPLICA, 'dump' );
395
396 // Discourage the server from disconnecting us if it takes a long time
397 // to read out the big ol' batch query.
398 $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
399
400 return $db;
401 }
402
409 public function setDB( IMaintainableDatabase $db ) {
410 parent::setDB( $db );
411 $this->forcedDb = $db;
412 }
413
414 public function reportPage() {
415 $this->pageCount++;
416 }
417
418 public function revCount() {
419 $this->revCount++;
420 $this->report();
421 }
422
423 public function report( bool $final = false ) {
424 if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
425 $this->showReport();
426 }
427 }
428
429 public function showReport() {
430 if ( $this->reporting ) {
431 $now = ConvertibleTimestamp::now( TS::DB );
432 $nowts = microtime( true );
433 $deltaAll = $nowts - $this->startTime;
434 $deltaPart = $nowts - $this->lastTime;
435 $this->pageCountPart = $this->pageCount - $this->pageCountLast;
436 $this->revCountPart = $this->revCount - $this->revCountLast;
437
438 if ( $deltaAll ) {
439 $portion = $this->revCount / $this->maxCount;
440 $eta = $this->startTime + $deltaAll / $portion;
441 $etats = wfTimestamp( TS::DB, intval( $eta ) );
442 $pageRate = $this->pageCount / $deltaAll;
443 $revRate = $this->revCount / $deltaAll;
444 } else {
445 $pageRate = '-';
446 $revRate = '-';
447 $etats = '-';
448 }
449 if ( $deltaPart ) {
450 $pageRatePart = $this->pageCountPart / $deltaPart;
451 $revRatePart = $this->revCountPart / $deltaPart;
452 } else {
453 $pageRatePart = '-';
454 $revRatePart = '-';
455 }
456
457 $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
458 $this->progress( sprintf(
459 "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
460 . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
461 $now, $dbDomain, $this->ID, $this->pageCount, $pageRate,
462 $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
463 $this->maxCount
464 ) );
465 $this->lastTime = $nowts;
466 $this->revCountLast = $this->revCount;
467 }
468 }
469
470 protected function progress( string $string ) {
471 if ( $this->reporting ) {
472 fwrite( $this->stderr, $string . "\n" );
473 }
474 }
475}
476
478class_alias( BackupDumper::class, 'BackupDumper' );
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
const DB_REPLICA
Definition defines.php:26
A class containing constants representing the names of configuration variables.
const XmlDumpSchemaVersion
Name constant for the XmlDumpSchemaVersion setting, for use with Config::get()
bool $skipHeader
don't output <mediawiki> and <siteinfo>
IMaintainableDatabase null $forcedDb
The dependency-injected database to use.
finalSetup(SettingsBuilder $settingsBuilder)
Handle some last-minute setup here.
dump( $history, $text=WikiExporter::TEXT)
array< string, class-string< DumpOutput > > $outputTypes
string null $schemaVersion
null means use default
string[] null $pages
null means all pages
processOptions()
Processes arguments and sets $this->$sink accordingly.
bool $skipFooter
don't output </mediawiki>
setDB(IMaintainableDatabase $db)
Force the dump to use the provided database connection for database operations, wherever possible.
array< string, class-string< DumpFilter > > $filterTypes
DumpMultiWriter DumpOutput null $sink
Output filters.
initProgress( $history=WikiExporter::FULL)
Initialise starting time and maximum revision count.
loadPlugin( $class, $file)
Load a plugin and register it.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
array $orderedOptions
Used to read the options in the order they were passed.
loadWithArgv( $argv)
Load params and arguments from a given array of command-line arguments.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
Builder class for constructing a Config object from a set of sources during bootstrap.
getConfig()
Returns the config loaded so far.
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:19
Interface to a relational database.
Definition IDatabase.php:31
setSessionOptions(array $options)
Override database's default behavior.
Advanced database interface for IDatabase handles that include maintenance methods.
Update the CREDITS list by merging in the list of git commit authors.