MediaWiki  1.30.0
backup.inc
Go to the documentation of this file.
1 <?php
27 require_once __DIR__ . '/Maintenance.php';
28 require_once __DIR__ . '/../includes/export/DumpFilter.php';
29 
32 
36 class BackupDumper extends Maintenance {
37  public $reporting = true;
38  public $pages = null; // all pages
39  public $skipHeader = false; // don't output <mediawiki> and <siteinfo>
40  public $skipFooter = false; // don't output </mediawiki>
41  public $startId = 0;
42  public $endId = 0;
43  public $revStartId = 0;
44  public $revEndId = 0;
45  public $dumpUploads = false;
46  public $dumpUploadFileContents = false;
47  public $orderRevs = false;
48 
49  protected $reportingInterval = 100;
50  protected $pageCount = 0;
51  protected $revCount = 0;
52  protected $server = null; // use default
53  protected $sink = null; // Output filters
54  protected $lastTime = 0;
55  protected $pageCountLast = 0;
56  protected $revCountLast = 0;
57 
58  protected $outputTypes = [];
59  protected $filterTypes = [];
60 
61  protected $ID = 0;
62 
70  protected $forcedDb = null;
71 
73  protected $lb;
74 
75  // @todo Unused?
76  private $stubText = false; // include rev_text_id instead of text; for 2-pass dump
77 
81  function __construct( $args = null ) {
82  parent::__construct();
83  $this->stderr = fopen( "php://stderr", "wt" );
84 
85  // Built-in output and filter plugins
86  $this->registerOutput( 'file', 'DumpFileOutput' );
87  $this->registerOutput( 'gzip', 'DumpGZipOutput' );
88  $this->registerOutput( 'bzip2', 'DumpBZip2Output' );
89  $this->registerOutput( 'dbzip2', 'DumpDBZip2Output' );
90  $this->registerOutput( '7zip', 'Dump7ZipOutput' );
91 
92  $this->registerFilter( 'latest', 'DumpLatestFilter' );
93  $this->registerFilter( 'notalk', 'DumpNotalkFilter' );
94  $this->registerFilter( 'namespace', 'DumpNamespaceFilter' );
95 
96  // These three can be specified multiple times
97  $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
98  false, true, false, true );
99  $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
100  '<type>s: file, gzip, bzip2, 7zip, dbzip2', false, true, false, true );
101  $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
102  '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
103  $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
104  'Default: 100.', false, true );
105  $this->addOption( 'server', 'Force reading from MySQL server', false, true );
106  $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
107  '-mx option to 7za command.', false, true );
108 
109  if ( $args ) {
110  // Args should be loaded and processed so that dump() can be called directly
111  // instead of execute()
112  $this->loadWithArgv( $args );
113  $this->processOptions();
114  }
115  }
116 
121  function registerOutput( $name, $class ) {
122  $this->outputTypes[$name] = $class;
123  }
124 
129  function registerFilter( $name, $class ) {
130  $this->filterTypes[$name] = $class;
131  }
132 
140  function loadPlugin( $class, $file ) {
141  if ( $file != '' ) {
142  require_once $file;
143  }
144  $register = [ $class, 'register' ];
145  call_user_func_array( $register, [ $this ] );
146  }
147 
148  function execute() {
149  throw new MWException( 'execute() must be overridden in subclasses' );
150  }
151 
155  function processOptions() {
156  $sink = null;
157  $sinks = [];
158 
160  foreach ( $options as $arg ) {
161  $opt = $arg[0];
162  $param = $arg[1];
163 
164  switch ( $opt ) {
165  case 'plugin':
166  $val = explode( ':', $param );
167 
168  if ( count( $val ) === 1 ) {
169  $this->loadPlugin( $val[0], '' );
170  } elseif ( count( $val ) === 2 ) {
171  $this->loadPlugin( $val[0], $val[1] );
172  } else {
173  $this->fatalError( 'Invalid plugin parameter' );
174  return;
175  }
176 
177  break;
178  case 'output':
179  $split = explode( ':', $param, 2 );
180  if ( count( $split ) !== 2 ) {
181  $this->fatalError( 'Invalid output parameter' );
182  }
183  list( $type, $file ) = $split;
184  if ( !is_null( $sink ) ) {
185  $sinks[] = $sink;
186  }
187  if ( !isset( $this->outputTypes[$type] ) ) {
188  $this->fatalError( "Unrecognized output sink type '$type'" );
189  }
190  $class = $this->outputTypes[$type];
191  if ( $type === "7zip" ) {
192  $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
193  } else {
194  $sink = new $class( $file );
195  }
196 
197  break;
198  case 'filter':
199  if ( is_null( $sink ) ) {
200  $sink = new DumpOutput();
201  }
202 
203  $split = explode( ':', $param );
204  $key = $split[0];
205 
206  if ( !isset( $this->filterTypes[$key] ) ) {
207  $this->fatalError( "Unrecognized filter type '$key'" );
208  }
209 
210  $type = $this->filterTypes[$key];
211 
212  if ( count( $split ) === 1 ) {
213  $filter = new $type( $sink );
214  } elseif ( count( $split ) === 2 ) {
215  $filter = new $type( $sink, $split[1] );
216  } else {
217  $this->fatalError( 'Invalid filter parameter' );
218  }
219 
220  // references are lame in php...
221  unset( $sink );
222  $sink = $filter;
223 
224  break;
225  }
226  }
227 
228  if ( $this->hasOption( 'report' ) ) {
229  $this->reportingInterval = intval( $this->getOption( 'report' ) );
230  }
231 
232  if ( $this->hasOption( 'server' ) ) {
233  $this->server = $this->getOption( 'server' );
234  }
235 
236  if ( is_null( $sink ) ) {
237  $sink = new DumpOutput();
238  }
239  $sinks[] = $sink;
240 
241  if ( count( $sinks ) > 1 ) {
242  $this->sink = new DumpMultiWriter( $sinks );
243  } else {
244  $this->sink = $sink;
245  }
246  }
247 
248  function dump( $history, $text = WikiExporter::TEXT ) {
249  # Notice messages will foul up your XML output even if they're
250  # relatively harmless.
251  if ( ini_get( 'display_errors' ) ) {
252  ini_set( 'display_errors', 'stderr' );
253  }
254 
255  $this->initProgress( $history );
256 
257  $db = $this->backupDb();
258  $exporter = new WikiExporter( $db, $history, WikiExporter::STREAM, $text );
259  $exporter->dumpUploads = $this->dumpUploads;
260  $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
261 
262  $wrapper = new ExportProgressFilter( $this->sink, $this );
263  $exporter->setOutputSink( $wrapper );
264 
265  if ( !$this->skipHeader ) {
266  $exporter->openStream();
267  }
268  # Log item dumps: all or by range
269  if ( $history & WikiExporter::LOGS ) {
270  if ( $this->startId || $this->endId ) {
271  $exporter->logsByRange( $this->startId, $this->endId );
272  } else {
273  $exporter->allLogs();
274  }
275  } elseif ( is_null( $this->pages ) ) {
276  # Page dumps: all or by page ID range
277  if ( $this->startId || $this->endId ) {
278  $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs );
279  } elseif ( $this->revStartId || $this->revEndId ) {
280  $exporter->revsByRange( $this->revStartId, $this->revEndId );
281  } else {
282  $exporter->allPages();
283  }
284  } else {
285  # Dump of specific pages
286  $exporter->pagesByName( $this->pages );
287  }
288 
289  if ( !$this->skipFooter ) {
290  $exporter->closeStream();
291  }
292 
293  $this->report( true );
294  }
295 
302  function initProgress( $history = WikiExporter::FULL ) {
303  $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
304  $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
305 
307  if ( $this->forcedDb === null ) {
308  $dbr = wfGetDB( DB_REPLICA );
309  }
310  $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
311  $this->startTime = microtime( true );
312  $this->lastTime = $this->startTime;
313  $this->ID = getmypid();
314  }
315 
322  function backupDb() {
323  if ( $this->forcedDb !== null ) {
324  return $this->forcedDb;
325  }
326 
327  $this->lb = wfGetLBFactory()->newMainLB();
328  $db = $this->lb->getConnection( DB_REPLICA, 'dump' );
329 
330  // Discourage the server from disconnecting us if it takes a long time
331  // to read out the big ol' batch query.
332  $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
333 
334  return $db;
335  }
336 
344  function setDB( IDatabase $db = null ) {
345  parent::setDB( $db );
346  $this->forcedDb = $db;
347  }
348 
349  function __destruct() {
350  if ( isset( $this->lb ) ) {
351  $this->lb->closeAll();
352  }
353  }
354 
355  function backupServer() {
357 
358  return $this->server
359  ? $this->server
360  : $wgDBserver;
361  }
362 
363  function reportPage() {
364  $this->pageCount++;
365  }
366 
367  function revCount() {
368  $this->revCount++;
369  $this->report();
370  }
371 
372  function report( $final = false ) {
373  if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
374  $this->showReport();
375  }
376  }
377 
378  function showReport() {
379  if ( $this->reporting ) {
380  $now = wfTimestamp( TS_DB );
381  $nowts = microtime( true );
382  $deltaAll = $nowts - $this->startTime;
383  $deltaPart = $nowts - $this->lastTime;
384  $this->pageCountPart = $this->pageCount - $this->pageCountLast;
385  $this->revCountPart = $this->revCount - $this->revCountLast;
386 
387  if ( $deltaAll ) {
388  $portion = $this->revCount / $this->maxCount;
389  $eta = $this->startTime + $deltaAll / $portion;
390  $etats = wfTimestamp( TS_DB, intval( $eta ) );
391  $pageRate = $this->pageCount / $deltaAll;
392  $revRate = $this->revCount / $deltaAll;
393  } else {
394  $pageRate = '-';
395  $revRate = '-';
396  $etats = '-';
397  }
398  if ( $deltaPart ) {
399  $pageRatePart = $this->pageCountPart / $deltaPart;
400  $revRatePart = $this->revCountPart / $deltaPart;
401  } else {
402  $pageRatePart = '-';
403  $revRatePart = '-';
404  }
405  $this->progress( sprintf(
406  "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
407  . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
408  $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
409  $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
410  $this->maxCount
411  ) );
412  $this->lastTime = $nowts;
413  $this->revCountLast = $this->revCount;
414  }
415  }
416 
417  function progress( $string ) {
418  if ( $this->reporting ) {
419  fwrite( $this->stderr, $string . "\n" );
420  }
421  }
422 
423  function fatalError( $msg ) {
424  $this->error( "$msg\n", 1 );
425  }
426 }
427 
429  function __construct( &$sink, &$progress ) {
430  parent::__construct( $sink );
431  $this->progress = $progress;
432  }
433 
434  function writeClosePage( $string ) {
435  parent::writeClosePage( $string );
436  $this->progress->reportPage();
437  }
438 
439  function writeRevision( $rev, $string ) {
440  parent::writeRevision( $rev, $string );
441  $this->progress->revCount();
442  }
443 }
BackupDumper\registerOutput
registerOutput( $name, $class)
Definition: backup.inc:121
BackupDumper\$endId
$endId
Definition: backup.inc:42
BackupDumper\revCount
revCount()
Definition: backup.inc:367
BackupDumper\backupDb
backupDb()
Definition: backup.inc:322
$wgDBserver
$wgDBserver
Database host name or IP address.
Definition: DefaultSettings.php:1772
BackupDumper\fatalError
fatalError( $msg)
Definition: backup.inc:423
$opt
$opt
Definition: postprocess-phan.php:115
WikiExporter\CURRENT
const CURRENT
Definition: WikiExporter.php:50
captcha-old.count
count
Definition: captcha-old.py:249
BackupDumper\$revEndId
$revEndId
Definition: backup.inc:44
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:2040
BackupDumper\backupServer
backupServer()
Definition: backup.inc:355
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
BackupDumper\__destruct
__destruct()
Definition: backup.inc:349
BackupDumper\$revCountLast
$revCountLast
Definition: backup.inc:56
ExportProgressFilter\writeRevision
writeRevision( $rev, $string)
Definition: backup.inc:439
BackupDumper\$pageCountLast
$pageCountLast
Definition: backup.inc:55
BackupDumper\$revCount
$revCount
Definition: backup.inc:51
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
BackupDumper\$sink
$sink
Definition: backup.inc:53
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
BackupDumper\$lb
LoadBalancer $lb
Definition: backup.inc:73
BackupDumper\$skipFooter
$skipFooter
Definition: backup.inc:40
BackupDumper\$orderRevs
$orderRevs
Definition: backup.inc:47
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
BackupDumper\$forcedDb
IDatabase null $forcedDb
The dependency-injected database to use.
Definition: backup.inc:70
pages
The ContentHandler facility adds support for arbitrary content types on wiki pages
Definition: contenthandler.txt:1
Maintenance\loadWithArgv
loadWithArgv( $argv)
Load params and arguments from a given array of command-line arguments.
Definition: Maintenance.php:763
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:40
DumpMultiWriter
Definition: DumpMultiWriter.php:29
BackupDumper\$skipHeader
$skipHeader
Definition: backup.inc:39
BackupDumper\$reporting
$reporting
Definition: backup.inc:37
MWException
MediaWiki exception.
Definition: MWException.php:26
BackupDumper\showReport
showReport()
Definition: backup.inc:378
WikiExporter\TEXT
const TEXT
Definition: WikiExporter.php:58
BackupDumper\$stubText
$stubText
Definition: backup.inc:76
ExportProgressFilter\writeClosePage
writeClosePage( $string)
Definition: backup.inc:434
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2856
BackupDumper\setDB
setDB(IDatabase $db=null)
Force the dump to use the provided database connection for database operations, wherever possible.
Definition: backup.inc:344
BackupDumper\$startId
$startId
Definition: backup.inc:41
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:215
Maintenance\$orderedOptions
array $orderedOptions
Used to read the options in the order they were passed.
Definition: Maintenance.php:149
BackupDumper\loadPlugin
loadPlugin( $class, $file)
Load a plugin and register it.
Definition: backup.inc:140
BackupDumper\$dumpUploadFileContents
$dumpUploadFileContents
Definition: backup.inc:46
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
DumpOutput
Definition: DumpOutput.php:29
WikiExporter
Definition: WikiExporter.php:36
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
BackupDumper\$ID
$ID
Definition: backup.inc:61
Wikimedia\Rdbms\LoadBalancer
Database connection, tracking, load balancing, and transaction manager for a cluster.
Definition: LoadBalancer.php:41
BackupDumper\initProgress
initProgress( $history=WikiExporter::FULL)
Initialise starting time and maximum revision count.
Definition: backup.inc:302
wfWikiID
wfWikiID()
Get an ASCII string identifying this wiki This is used as a prefix in memcached keys.
Definition: GlobalFunctions.php:2807
BackupDumper\report
report( $final=false)
Definition: backup.inc:372
BackupDumper\$outputTypes
$outputTypes
Definition: backup.inc:58
ExportProgressFilter\__construct
__construct(&$sink, &$progress)
Definition: backup.inc:429
BackupDumper\$reportingInterval
$reportingInterval
Definition: backup.inc:49
BackupDumper\$pages
$pages
Definition: backup.inc:38
WikiExporter\STREAM
const STREAM
Definition: WikiExporter.php:56
WikiExporter\FULL
const FULL
Definition: WikiExporter.php:49
BackupDumper\$dumpUploads
$dumpUploads
Definition: backup.inc:45
BackupDumper\__construct
__construct( $args=null)
Definition: backup.inc:81
BackupDumper\progress
progress( $string)
Definition: backup.inc:417
wfGetLBFactory
wfGetLBFactory()
Get the load balancer factory object.
Definition: GlobalFunctions.php:2885
DumpFilter
Definition: DumpFilter.php:31
DumpFilter\$sink
DumpOutput $sink
FIXME will need to be made protected whenever legacy code is updated.
Definition: DumpFilter.php:37
$args
if( $line===false) $args
Definition: cdb.php:63
BackupDumper
Definition: backup.inc:36
BackupDumper\execute
execute()
Do the actual work.
Definition: backup.inc:148
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:250
$dbr
if(! $regexes) $dbr
Definition: cleanup.php:94
WikiExporter\LOGS
const LOGS
Definition: WikiExporter.php:52
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1965
BackupDumper\$server
$server
Definition: backup.inc:52
BackupDumper\reportPage
reportPage()
Definition: backup.inc:363
$rev
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1750
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BackupDumper\processOptions
processOptions()
Processes arguments and sets $this->$sink accordingly.
Definition: backup.inc:155
BackupDumper\$lastTime
$lastTime
Definition: backup.inc:54
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:392
BackupDumper\dump
dump( $history, $text=WikiExporter::TEXT)
Definition: backup.inc:248
BackupDumper\$filterTypes
$filterTypes
Definition: backup.inc:59
ExportProgressFilter
Definition: backup.inc:428
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular param exists.
Definition: Maintenance.php:236
server
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of and they certainly aren t ideal for someone who s installing MediaWiki as MediaWiki does not conform to normal Unix filesystem layout Hopefully we ll offer direct support for standard layouts in the but for now *any change to the location of files is unsupported *Moving things and leaving symlinks will *probably *not break but it is *strongly *advised not to try any more intrusive changes to get MediaWiki to conform more closely to your filesystem hierarchy Any such attempt will almost certainly result in unnecessary bugs The standard recommended location to install relative to the web is it should be possible to enable the appropriate rewrite rules by if you can reconfigure the web server
Definition: distributors.txt:53
BackupDumper\$revStartId
$revStartId
Definition: backup.inc:43
BackupDumper\$pageCount
$pageCount
Definition: backup.inc:50
$type
$type
Definition: testCompression.php:48
BackupDumper\registerFilter
registerFilter( $name, $class)
Definition: backup.inc:129