MediaWiki  master
BackupDumper.php
Go to the documentation of this file.
1 <?php
28 require_once __DIR__ . '/../Maintenance.php';
29 require_once __DIR__ . '/../../includes/export/WikiExporter.php';
30 
36 
41 abstract class BackupDumper extends Maintenance {
43  public $reporting = true;
45  public $pages = null;
47  public $skipHeader = false;
49  public $skipFooter = false;
51  public $startId = 0;
53  public $endId = 0;
55  public $revStartId = 0;
57  public $revEndId = 0;
59  public $dumpUploads = false;
61  public $dumpUploadFileContents = false;
63  public $orderRevs = false;
65  public $limitNamespaces = [];
67  public $stderr;
68 
70  protected $reportingInterval = 100;
72  protected $pageCount = 0;
74  protected $revCount = 0;
76  protected $schemaVersion = null;
78  protected $server = null;
80  protected $sink = null;
82  protected $lastTime = 0;
84  protected $pageCountLast = 0;
86  protected $revCountLast = 0;
87 
89  protected $outputTypes = [];
91  protected $filterTypes = [];
92 
94  protected $ID = 0;
95 
97  protected $startTime;
99  protected $pageCountPart;
101  protected $revCountPart;
103  protected $maxCount;
105  protected $timeOfCheckpoint;
107  protected $egress;
109  protected $buffer;
111  protected $openElement;
113  protected $atStart;
115  protected $thisRevModel;
117  protected $thisRevFormat;
119  protected $lastName;
121  protected $state;
122 
130  protected $forcedDb = null;
131 
133  protected $lb;
134 
138  public function __construct( $args = null ) {
139  parent::__construct();
140  $this->stderr = fopen( "php://stderr", "wt" );
141 
142  // Built-in output and filter plugins
143  $this->registerOutput( 'file', DumpFileOutput::class );
144  $this->registerOutput( 'gzip', DumpGZipOutput::class );
145  $this->registerOutput( 'bzip2', DumpBZip2Output::class );
146  $this->registerOutput( 'dbzip2', DumpDBZip2Output::class );
147  $this->registerOutput( 'lbzip2', DumpLBZip2Output::class );
148  $this->registerOutput( '7zip', Dump7ZipOutput::class );
149 
150  $this->registerFilter( 'latest', DumpLatestFilter::class );
151  $this->registerFilter( 'notalk', DumpNotalkFilter::class );
152  $this->registerFilter( 'namespace', DumpNamespaceFilter::class );
153 
154  // These three can be specified multiple times
155  $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
156  false, true, false, true );
157  $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
158  '<type>s: file, gzip, bzip2, 7zip, dbzip2, lbzip2', false, true, 'o', true );
159  $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
160  '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
161  $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
162  'Default: 100.', false, true );
163  $this->addOption( 'server', 'Force reading from MySQL server', false, true );
164  $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
165  '-mx option to 7za command.', false, true );
166  // NOTE: we can't know the default schema version yet, since configuration has not been
167  // loaded when this constructor is called. To work around this, we re-declare
168  // this option in validateParamsAndArgs().
169  $this->addOption( 'schema-version', 'Schema version to use for output.', false, true );
170 
171  if ( $args ) {
172  // Args should be loaded and processed so that dump() can be called directly
173  // instead of execute()
174  $this->loadWithArgv( $args );
175  $this->processOptions();
176  }
177  }
178 
179  public function finalSetup( SettingsBuilder $settingsBuilder = null ) {
180  parent::finalSetup( $settingsBuilder );
181  // re-declare the --schema-version option to include the default schema version
182  // in the description.
183  $schemaVersion = $settingsBuilder->getConfig()->get( MainConfigNames::XmlDumpSchemaVersion );
184  $this->addOption( 'schema-version', 'Schema version to use for output. ' .
185  'Default: ' . $schemaVersion, false, true );
186  }
187 
192  public function registerOutput( $name, $class ) {
193  $this->outputTypes[$name] = $class;
194  }
195 
200  public function registerFilter( $name, $class ) {
201  $this->filterTypes[$name] = $class;
202  }
203 
211  public function loadPlugin( $class, $file ) {
212  if ( $file != '' ) {
213  require_once $file;
214  }
215  $register = [ $class, 'register' ];
216  $register( $this );
217  }
218 
219  public function execute() {
220  throw new MWException( 'execute() must be overridden in subclasses' );
221  }
222 
226  protected function processOptions() {
227  $sink = null;
228  $sinks = [];
229 
230  $this->schemaVersion = WikiExporter::schemaVersion();
231 
232  $options = $this->orderedOptions;
233  foreach ( $options as [ $opt, $param ] ) {
234  switch ( $opt ) {
235  case 'plugin':
236  $val = explode( ':', $param, 2 );
237 
238  if ( count( $val ) === 1 ) {
239  $this->loadPlugin( $val[0], '' );
240  } elseif ( count( $val ) === 2 ) {
241  $this->loadPlugin( $val[0], $val[1] );
242  }
243 
244  break;
245  case 'output':
246  $split = explode( ':', $param, 2 );
247  if ( count( $split ) !== 2 ) {
248  $this->fatalError( 'Invalid output parameter' );
249  }
250  list( $type, $file ) = $split;
251  if ( $sink !== null ) {
252  $sinks[] = $sink;
253  }
254  if ( !isset( $this->outputTypes[$type] ) ) {
255  $this->fatalError( "Unrecognized output sink type '$type'" );
256  }
257  $class = $this->outputTypes[$type];
258  if ( $type === "7zip" ) {
259  $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
260  } else {
261  $sink = new $class( $file );
262  }
263 
264  break;
265  case 'filter':
266  if ( $sink === null ) {
267  $sink = new DumpOutput();
268  }
269 
270  $split = explode( ':', $param, 2 );
271  $key = $split[0];
272 
273  if ( !isset( $this->filterTypes[$key] ) ) {
274  $this->fatalError( "Unrecognized filter type '$key'" );
275  }
276 
277  $type = $this->filterTypes[$key];
278 
279  if ( count( $split ) === 2 ) {
280  $filter = new $type( $sink, $split[1] );
281  } else {
282  $filter = new $type( $sink );
283  }
284 
285  // references are lame in php...
286  unset( $sink );
287  $sink = $filter;
288 
289  break;
290  case 'schema-version':
291  if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) {
292  $this->fatalError(
293  "Unsupported schema version $param. Supported versions: " .
294  implode( ', ', XmlDumpWriter::$supportedSchemas )
295  );
296  }
297  $this->schemaVersion = $param;
298  break;
299  }
300  }
301 
302  if ( $this->hasOption( 'report' ) ) {
303  $this->reportingInterval = intval( $this->getOption( 'report' ) );
304  }
305 
306  if ( $this->hasOption( 'server' ) ) {
307  $this->server = $this->getOption( 'server' );
308  }
309 
310  if ( $sink === null ) {
311  $sink = new DumpOutput();
312  }
313  $sinks[] = $sink;
314 
315  if ( count( $sinks ) > 1 ) {
316  $this->sink = new DumpMultiWriter( $sinks );
317  } else {
318  $this->sink = $sink;
319  }
320  }
321 
322  public function dump( $history, $text = WikiExporter::TEXT ) {
323  # Notice messages will foul up your XML output even if they're
324  # relatively harmless.
325  if ( ini_get( 'display_errors' ) ) {
326  ini_set( 'display_errors', 'stderr' );
327  }
328 
329  $this->initProgress( $history );
330 
331  $db = $this->backupDb();
332  $services = MediaWikiServices::getInstance();
333  $exporter = $services->getWikiExporterFactory()->getWikiExporter(
334  $db,
335  $history,
336  $text,
337  $this->limitNamespaces
338  );
339  $exporter->setSchemaVersion( $this->schemaVersion );
340  $exporter->dumpUploads = $this->dumpUploads;
341  $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
342 
343  $wrapper = new ExportProgressFilter( $this->sink, $this );
344  $exporter->setOutputSink( $wrapper );
345 
346  if ( !$this->skipHeader ) {
347  $exporter->openStream();
348  }
349  # Log item dumps: all or by range
350  if ( $history & WikiExporter::LOGS ) {
351  if ( $this->startId || $this->endId ) {
352  $exporter->logsByRange( $this->startId, $this->endId );
353  } else {
354  $exporter->allLogs();
355  }
356  } elseif ( $this->pages === null ) {
357  # Page dumps: all or by page ID range
358  if ( $this->startId || $this->endId ) {
359  $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs );
360  } elseif ( $this->revStartId || $this->revEndId ) {
361  $exporter->revsByRange( $this->revStartId, $this->revEndId );
362  } else {
363  $exporter->allPages();
364  }
365  } else {
366  # Dump of specific pages
367  $exporter->pagesByName( $this->pages );
368  }
369 
370  if ( !$this->skipFooter ) {
371  $exporter->closeStream();
372  }
373 
374  $this->report( true );
375  }
376 
383  public function initProgress( $history = WikiExporter::FULL ) {
384  $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
385  $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
386 
388  if ( $this->forcedDb === null ) {
389  $dbr = $this->getDB( DB_REPLICA, [ 'dump' ] );
390  }
391  $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
392  $this->startTime = microtime( true );
393  $this->lastTime = $this->startTime;
394  $this->ID = getmypid();
395  }
396 
403  protected function backupDb() {
404  if ( $this->forcedDb !== null ) {
405  return $this->forcedDb;
406  }
407 
408  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
409  $this->lb = $lbFactory->newMainLB();
410  $db = $this->lb->getMaintenanceConnectionRef( DB_REPLICA, 'dump' );
411 
412  // Discourage the server from disconnecting us if it takes a long time
413  // to read out the big ol' batch query.
414  $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
415 
416  return $db;
417  }
418 
425  public function setDB( IMaintainableDatabase $db ) {
426  parent::setDB( $db );
427  $this->forcedDb = $db;
428  }
429 
430  public function __destruct() {
431  if ( isset( $this->lb ) ) {
432  $this->lb->closeAll( __METHOD__ );
433  }
434  }
435 
436  protected function backupServer() {
437  global $wgDBserver;
438 
439  return $this->server ?: $wgDBserver;
440  }
441 
442  public function reportPage() {
443  $this->pageCount++;
444  }
445 
446  public function revCount() {
447  $this->revCount++;
448  $this->report();
449  }
450 
451  public function report( $final = false ) {
452  if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
453  $this->showReport();
454  }
455  }
456 
457  public function showReport() {
458  if ( $this->reporting ) {
459  $now = wfTimestamp( TS_DB );
460  $nowts = microtime( true );
461  $deltaAll = $nowts - $this->startTime;
462  $deltaPart = $nowts - $this->lastTime;
463  $this->pageCountPart = $this->pageCount - $this->pageCountLast;
464  $this->revCountPart = $this->revCount - $this->revCountLast;
465 
466  if ( $deltaAll ) {
467  $portion = $this->revCount / $this->maxCount;
468  $eta = $this->startTime + $deltaAll / $portion;
469  $etats = wfTimestamp( TS_DB, intval( $eta ) );
470  $pageRate = $this->pageCount / $deltaAll;
471  $revRate = $this->revCount / $deltaAll;
472  } else {
473  $pageRate = '-';
474  $revRate = '-';
475  $etats = '-';
476  }
477  if ( $deltaPart ) {
478  $pageRatePart = $this->pageCountPart / $deltaPart;
479  $revRatePart = $this->revCountPart / $deltaPart;
480  } else {
481  $pageRatePart = '-';
482  $revRatePart = '-';
483  }
484 
485  $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
486  $this->progress( sprintf(
487  "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
488  . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
489  $now, $dbDomain, $this->ID, $this->pageCount, $pageRate,
490  $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
491  $this->maxCount
492  ) );
493  $this->lastTime = $nowts;
494  $this->revCountLast = $this->revCount;
495  }
496  }
497 
498  protected function progress( $string ) {
499  if ( $this->reporting ) {
500  fwrite( $this->stderr, $string . "\n" );
501  }
502  }
503 }
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
string null $server
null means use default
float $startTime
float $timeOfCheckpoint
array null $limitNamespaces
dump( $history, $text=WikiExporter::TEXT)
setDB(IMaintainableDatabase $db)
Force the dump to use the provided database connection for database operations, wherever possible.
int $reportingInterval
string[] $filterTypes
registerFilter( $name, $class)
progress( $string)
LoadBalancer $lb
string null $thisRevModel
string $lastName
string[] $outputTypes
__construct( $args=null)
execute()
Do the actual work.
IMaintainableDatabase null $forcedDb
The dependency-injected database to use.
DumpMultiWriter DumpOutput null $sink
Output filters.
bool $skipHeader
don't output <mediawiki> and <siteinfo>
bool $skipFooter
don't output </mediawiki>
finalSetup(SettingsBuilder $settingsBuilder=null)
Handle some last-minute setup here.
string[] null $pages
null means all pages
string null $thisRevFormat
initProgress( $history=WikiExporter::FULL)
Initialise starting time and maximum revision count.
string null $schemaVersion
null means use default
array false $openElement
processOptions()
Processes arguments and sets $this->$sink accordingly.
registerOutput( $name, $class)
bool $dumpUploadFileContents
ExportProgressFilter $egress
resource false $stderr
loadPlugin( $class, $file)
Load a plugin and register it.
report( $final=false)
MediaWiki exception.
Definition: MWException.php:29
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
array $orderedOptions
Used to read the options in the order they were passed.
hasOption( $name)
Checks to see if a particular option was set.
loadWithArgv( $argv)
Load params and arguments from a given array of command-line arguments.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Utility for loading settings files.
static schemaVersion()
Returns the default export schema version, as defined by the XmlDumpSchemaVersion setting.
static getCurrentWikiDbDomain()
Definition: WikiMap.php:293
static string[] $supportedSchemas
the schema versions supported for output @final
$wgDBserver
Config variable stub for the DBserver setting, for use by phpdoc and IDEs.
setSessionOptions(array $options)
Override database's default behavior.
Advanced database interface for IDatabase handles that include maintenance methods.
if( $line===false) $args
Definition: mcc.php:124
const DB_REPLICA
Definition: defines.php:26
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42