MediaWiki  master
BackupDumper.php
Go to the documentation of this file.
1 <?php
28 require_once __DIR__ . '/../Maintenance.php';
29 require_once __DIR__ . '/../../includes/export/WikiExporter.php';
30 
36 
41 abstract class BackupDumper extends Maintenance {
43  public $reporting = true;
45  public $pages = null;
47  public $skipHeader = false;
49  public $skipFooter = false;
51  public $startId = 0;
53  public $endId = 0;
55  public $revStartId = 0;
57  public $revEndId = 0;
59  public $dumpUploads = false;
61  public $dumpUploadFileContents = false;
63  public $orderRevs = false;
65  public $limitNamespaces = [];
67  public $stderr;
68 
70  protected $reportingInterval = 100;
72  protected $pageCount = 0;
74  protected $revCount = 0;
76  protected $schemaVersion = null;
78  protected $server = null;
80  protected $sink = null;
82  protected $lastTime = 0;
84  protected $pageCountLast = 0;
86  protected $revCountLast = 0;
87 
89  protected $outputTypes = [];
91  protected $filterTypes = [];
92 
94  protected $ID = 0;
95 
97  protected $startTime;
99  protected $pageCountPart;
101  protected $revCountPart;
103  protected $maxCount;
105  protected $timeOfCheckpoint;
107  protected $egress;
109  protected $buffer;
111  protected $openElement;
113  protected $atStart;
115  protected $thisRevModel;
117  protected $thisRevFormat;
119  protected $lastName;
121  protected $state;
122 
130  protected $forcedDb = null;
131 
133  protected $lb;
134 
138  public function __construct( $args = null ) {
139  parent::__construct();
140  $this->stderr = fopen( "php://stderr", "wt" );
141 
142  // Built-in output and filter plugins
143  $this->registerOutput( 'file', DumpFileOutput::class );
144  $this->registerOutput( 'gzip', DumpGZipOutput::class );
145  $this->registerOutput( 'bzip2', DumpBZip2Output::class );
146  $this->registerOutput( 'dbzip2', DumpDBZip2Output::class );
147  $this->registerOutput( 'lbzip2', DumpLBZip2Output::class );
148  $this->registerOutput( '7zip', Dump7ZipOutput::class );
149 
150  $this->registerFilter( 'latest', DumpLatestFilter::class );
151  $this->registerFilter( 'notalk', DumpNotalkFilter::class );
152  $this->registerFilter( 'namespace', DumpNamespaceFilter::class );
153 
154  // These three can be specified multiple times
155  $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
156  false, true, false, true );
157  $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
158  '<type>s: file, gzip, bzip2, 7zip, dbzip2, lbzip2', false, true, 'o', true );
159  $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
160  '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
161  $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
162  'Default: 100.', false, true );
163  $this->addOption( 'server', 'Force reading from MySQL server', false, true );
164  $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
165  '-mx option to 7za command.', false, true );
166  // NOTE: we can't know the default schema version yet, since configuration has not been
167  // loaded when this constructor is called. To work around this, we re-declare
168  // this option in validateParamsAndArgs().
169  $this->addOption( 'schema-version', 'Schema version to use for output.', false, true );
170 
171  if ( $args ) {
172  // Args should be loaded and processed so that dump() can be called directly
173  // instead of execute()
174  $this->loadWithArgv( $args );
175  $this->processOptions();
176  }
177  }
178 
179  public function finalSetup( SettingsBuilder $settingsBuilder = null ) {
180  parent::finalSetup( $settingsBuilder );
181  // re-declare the --schema-version option to include the default schema version
182  // in the description.
183  $schemaVersion = $settingsBuilder->getConfig()->get( MainConfigNames::XmlDumpSchemaVersion );
184  $this->addOption( 'schema-version', 'Schema version to use for output. ' .
185  'Default: ' . $schemaVersion, false, true );
186  }
187 
192  public function registerOutput( $name, $class ) {
193  $this->outputTypes[$name] = $class;
194  }
195 
200  public function registerFilter( $name, $class ) {
201  $this->filterTypes[$name] = $class;
202  }
203 
211  public function loadPlugin( $class, $file ) {
212  if ( $file != '' ) {
213  require_once $file;
214  }
215  $register = [ $class, 'register' ];
216  $register( $this );
217  }
218 
222  protected function processOptions() {
223  $sink = null;
224  $sinks = [];
225 
226  $this->schemaVersion = WikiExporter::schemaVersion();
227 
228  $options = $this->orderedOptions;
229  foreach ( $options as [ $opt, $param ] ) {
230  switch ( $opt ) {
231  case 'plugin':
232  $val = explode( ':', $param, 2 );
233 
234  if ( count( $val ) === 1 ) {
235  $this->loadPlugin( $val[0], '' );
236  } elseif ( count( $val ) === 2 ) {
237  $this->loadPlugin( $val[0], $val[1] );
238  }
239 
240  break;
241  case 'output':
242  $split = explode( ':', $param, 2 );
243  if ( count( $split ) !== 2 ) {
244  $this->fatalError( 'Invalid output parameter' );
245  }
246  [ $type, $file ] = $split;
247  if ( $sink !== null ) {
248  $sinks[] = $sink;
249  }
250  if ( !isset( $this->outputTypes[$type] ) ) {
251  $this->fatalError( "Unrecognized output sink type '$type'" );
252  }
253  $class = $this->outputTypes[$type];
254  if ( $type === "7zip" ) {
255  $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
256  } else {
257  $sink = new $class( $file );
258  }
259 
260  break;
261  case 'filter':
262  if ( $sink === null ) {
263  $sink = new DumpOutput();
264  }
265 
266  $split = explode( ':', $param, 2 );
267  $key = $split[0];
268 
269  if ( !isset( $this->filterTypes[$key] ) ) {
270  $this->fatalError( "Unrecognized filter type '$key'" );
271  }
272 
273  $type = $this->filterTypes[$key];
274 
275  if ( count( $split ) === 2 ) {
276  $filter = new $type( $sink, $split[1] );
277  } else {
278  $filter = new $type( $sink );
279  }
280 
281  // references are lame in php...
282  unset( $sink );
283  $sink = $filter;
284 
285  break;
286  case 'schema-version':
287  if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) {
288  $this->fatalError(
289  "Unsupported schema version $param. Supported versions: " .
290  implode( ', ', XmlDumpWriter::$supportedSchemas )
291  );
292  }
293  $this->schemaVersion = $param;
294  break;
295  }
296  }
297 
298  if ( $this->hasOption( 'report' ) ) {
299  $this->reportingInterval = intval( $this->getOption( 'report' ) );
300  }
301 
302  if ( $this->hasOption( 'server' ) ) {
303  $this->server = $this->getOption( 'server' );
304  }
305 
306  if ( $sink === null ) {
307  $sink = new DumpOutput();
308  }
309  $sinks[] = $sink;
310 
311  if ( count( $sinks ) > 1 ) {
312  $this->sink = new DumpMultiWriter( $sinks );
313  } else {
314  $this->sink = $sink;
315  }
316  }
317 
318  public function dump( $history, $text = WikiExporter::TEXT ) {
319  # Notice messages will foul up your XML output even if they're
320  # relatively harmless.
321  if ( ini_get( 'display_errors' ) ) {
322  ini_set( 'display_errors', 'stderr' );
323  }
324 
325  $this->initProgress( $history );
326 
327  $db = $this->backupDb();
328  $services = $this->getServiceContainer();
329  $exporter = $services->getWikiExporterFactory()->getWikiExporter(
330  $db,
331  $history,
332  $text,
333  $this->limitNamespaces
334  );
335  $exporter->setSchemaVersion( $this->schemaVersion );
336  $exporter->dumpUploads = $this->dumpUploads;
337  $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
338 
339  $wrapper = new ExportProgressFilter( $this->sink, $this );
340  $exporter->setOutputSink( $wrapper );
341 
342  if ( !$this->skipHeader ) {
343  $exporter->openStream();
344  }
345  # Log item dumps: all or by range
346  if ( $history & WikiExporter::LOGS ) {
347  if ( $this->startId || $this->endId ) {
348  $exporter->logsByRange( $this->startId, $this->endId );
349  } else {
350  $exporter->allLogs();
351  }
352  } elseif ( $this->pages === null ) {
353  # Page dumps: all or by page ID range
354  if ( $this->startId || $this->endId ) {
355  $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs );
356  } elseif ( $this->revStartId || $this->revEndId ) {
357  $exporter->revsByRange( $this->revStartId, $this->revEndId );
358  } else {
359  $exporter->allPages();
360  }
361  } else {
362  # Dump of specific pages
363  $exporter->pagesByName( $this->pages );
364  }
365 
366  if ( !$this->skipFooter ) {
367  $exporter->closeStream();
368  }
369 
370  $this->report( true );
371  }
372 
379  public function initProgress( $history = WikiExporter::FULL ) {
380  $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
381  $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
382 
383  $dbr = $this->forcedDb;
384  if ( $this->forcedDb === null ) {
385  $dbr = $this->getDB( DB_REPLICA, [ 'dump' ] );
386  }
387  $this->maxCount = $dbr->newSelectQueryBuilder()
388  ->select( "MAX($field)" )
389  ->from( $table )
390  ->caller( __METHOD__ )->fetchField();
391  $this->startTime = microtime( true );
392  $this->lastTime = $this->startTime;
393  $this->ID = getmypid();
394  }
395 
402  protected function backupDb() {
403  if ( $this->forcedDb !== null ) {
404  return $this->forcedDb;
405  }
406 
407  $lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory();
408  $this->lb = $lbFactory->newMainLB();
409  $db = $this->lb->getMaintenanceConnectionRef( DB_REPLICA, 'dump' );
410 
411  // Discourage the server from disconnecting us if it takes a long time
412  // to read out the big ol' batch query.
413  $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
414 
415  return $db;
416  }
417 
424  public function setDB( IMaintainableDatabase $db ) {
425  parent::setDB( $db );
426  $this->forcedDb = $db;
427  }
428 
429  public function __destruct() {
430  if ( isset( $this->lb ) ) {
431  $this->lb->closeAll( __METHOD__ );
432  }
433  }
434 
435  protected function backupServer() {
436  global $wgDBserver;
437 
438  return $this->server ?: $wgDBserver;
439  }
440 
441  public function reportPage() {
442  $this->pageCount++;
443  }
444 
445  public function revCount() {
446  $this->revCount++;
447  $this->report();
448  }
449 
450  public function report( $final = false ) {
451  if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
452  $this->showReport();
453  }
454  }
455 
456  public function showReport() {
457  if ( $this->reporting ) {
458  $now = wfTimestamp( TS_DB );
459  $nowts = microtime( true );
460  $deltaAll = $nowts - $this->startTime;
461  $deltaPart = $nowts - $this->lastTime;
462  $this->pageCountPart = $this->pageCount - $this->pageCountLast;
463  $this->revCountPart = $this->revCount - $this->revCountLast;
464 
465  if ( $deltaAll ) {
466  $portion = $this->revCount / $this->maxCount;
467  $eta = $this->startTime + $deltaAll / $portion;
468  $etats = wfTimestamp( TS_DB, intval( $eta ) );
469  $pageRate = $this->pageCount / $deltaAll;
470  $revRate = $this->revCount / $deltaAll;
471  } else {
472  $pageRate = '-';
473  $revRate = '-';
474  $etats = '-';
475  }
476  if ( $deltaPart ) {
477  $pageRatePart = $this->pageCountPart / $deltaPart;
478  $revRatePart = $this->revCountPart / $deltaPart;
479  } else {
480  $pageRatePart = '-';
481  $revRatePart = '-';
482  }
483 
484  $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
485  $this->progress( sprintf(
486  "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
487  . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
488  $now, $dbDomain, $this->ID, $this->pageCount, $pageRate,
489  $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
490  $this->maxCount
491  ) );
492  $this->lastTime = $nowts;
493  $this->revCountLast = $this->revCount;
494  }
495  }
496 
497  protected function progress( $string ) {
498  if ( $this->reporting ) {
499  fwrite( $this->stderr, $string . "\n" );
500  }
501  }
502 }
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
string null $server
null means use default
float $startTime
float $timeOfCheckpoint
array null $limitNamespaces
dump( $history, $text=WikiExporter::TEXT)
setDB(IMaintainableDatabase $db)
Force the dump to use the provided database connection for database operations, wherever possible.
int $reportingInterval
string[] $filterTypes
registerFilter( $name, $class)
progress( $string)
LoadBalancer $lb
string null $thisRevModel
string $lastName
string[] $outputTypes
__construct( $args=null)
IMaintainableDatabase null $forcedDb
The dependency-injected database to use.
DumpMultiWriter DumpOutput null $sink
Output filters.
bool $skipHeader
don't output <mediawiki> and <siteinfo>
bool $skipFooter
don't output </mediawiki>
finalSetup(SettingsBuilder $settingsBuilder=null)
Handle some last-minute setup here.
string[] null $pages
null means all pages
string null $thisRevFormat
initProgress( $history=WikiExporter::FULL)
Initialise starting time and maximum revision count.
string null $schemaVersion
null means use default
array false $openElement
processOptions()
Processes arguments and sets $this->$sink accordingly.
registerOutput( $name, $class)
bool $dumpUploadFileContents
ExportProgressFilter $egress
resource false $stderr
loadPlugin( $class, $file)
Load a plugin and register it.
report( $final=false)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
array $orderedOptions
Used to read the options in the order they were passed.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
loadWithArgv( $argv)
Load params and arguments from a given array of command-line arguments.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Builder class for constructing a Config object from a set of sources during bootstrap.
Tools for dealing with other locally-hosted wikis.
Definition: WikiMap.php:31
static schemaVersion()
Returns the default export schema version, as defined by the XmlDumpSchemaVersion setting.
static string[] $supportedSchemas
the schema versions supported for output @final
$wgDBserver
Config variable stub for the DBserver setting, for use by phpdoc and IDEs.
Advanced database interface for IDatabase handles that include maintenance methods.
const DB_REPLICA
Definition: defines.php:26
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42