MediaWiki  1.27.2
backup.inc
Go to the documentation of this file.
1 <?php
27 require_once __DIR__ . '/Maintenance.php';
28 require_once __DIR__ . '/../includes/export/DumpFilter.php';
29 
33 class BackupDumper extends Maintenance {
34  public $reporting = true;
35  public $pages = null; // all pages
36  public $skipHeader = false; // don't output <mediawiki> and <siteinfo>
37  public $skipFooter = false; // don't output </mediawiki>
38  public $startId = 0;
39  public $endId = 0;
40  public $revStartId = 0;
41  public $revEndId = 0;
42  public $dumpUploads = false;
43  public $dumpUploadFileContents = false;
44 
45  protected $reportingInterval = 100;
46  protected $pageCount = 0;
47  protected $revCount = 0;
48  protected $server = null; // use default
49  protected $sink = null; // Output filters
50  protected $lastTime = 0;
51  protected $pageCountLast = 0;
52  protected $revCountLast = 0;
53 
54  protected $outputTypes = [];
55  protected $filterTypes = [];
56 
57  protected $ID = 0;
58 
66  protected $forcedDb = null;
67 
69  protected $lb;
70 
71  // @todo Unused?
72  private $stubText = false; // include rev_text_id instead of text; for 2-pass dump
73 
77  function __construct( $args = null ) {
78  parent::__construct();
79  $this->stderr = fopen( "php://stderr", "wt" );
80 
81  // Built-in output and filter plugins
82  $this->registerOutput( 'file', 'DumpFileOutput' );
83  $this->registerOutput( 'gzip', 'DumpGZipOutput' );
84  $this->registerOutput( 'bzip2', 'DumpBZip2Output' );
85  $this->registerOutput( 'dbzip2', 'DumpDBZip2Output' );
86  $this->registerOutput( '7zip', 'Dump7ZipOutput' );
87 
88  $this->registerFilter( 'latest', 'DumpLatestFilter' );
89  $this->registerFilter( 'notalk', 'DumpNotalkFilter' );
90  $this->registerFilter( 'namespace', 'DumpNamespaceFilter' );
91 
92  // These three can be specified multiple times
93  $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
94  false, true, false, true );
95  $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
96  '<type>s: file, gzip, bzip2, 7zip, dbzip2', false, true, false, true );
97  $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
98  '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
99  $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
100  'Default: 100.', false, true );
101  $this->addOption( 'server', 'Force reading from MySQL server', false, true );
102  $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
103  '-mx option to 7za command.', false, true );
104 
105  if ( $args ) {
106  // Args should be loaded and processed so that dump() can be called directly
107  // instead of execute()
108  $this->loadWithArgv( $args );
109  $this->processOptions();
110  }
111  }
112 
117  function registerOutput( $name, $class ) {
118  $this->outputTypes[$name] = $class;
119  }
120 
125  function registerFilter( $name, $class ) {
126  $this->filterTypes[$name] = $class;
127  }
128 
136  function loadPlugin( $class, $file ) {
137  if ( $file != '' ) {
138  require_once $file;
139  }
140  $register = [ $class, 'register' ];
141  call_user_func_array( $register, [ $this ] );
142  }
143 
144  function execute() {
145  throw new MWException( 'execute() must be overridden in subclasses' );
146  }
147 
151  function processOptions() {
152  $sink = null;
153  $sinks = [];
154 
156  foreach ( $options as $arg ) {
157  $opt = $arg[0];
158  $param = $arg[1];
159 
160  switch ( $opt ) {
161  case 'plugin':
162  $val = explode( ':', $param );
163 
164  if ( count( $val ) === 1 ) {
165  $this->loadPlugin( $val[0] );
166  } elseif ( count( $val ) === 2 ) {
167  $this->loadPlugin( $val[0], $val[1] );
168  } else {
169  $this->fatalError( 'Invalid plugin parameter' );
170  return;
171  }
172 
173  break;
174  case 'output':
175  $split = explode( ':', $param, 2 );
176  if ( count( $split ) !== 2 ) {
177  $this->fatalError( 'Invalid output parameter' );
178  }
179  list( $type, $file ) = $split;
180  if ( !is_null( $sink ) ) {
181  $sinks[] = $sink;
182  }
183  if ( !isset( $this->outputTypes[$type] ) ) {
184  $this->fatalError( "Unrecognized output sink type '$type'" );
185  }
186  $class = $this->outputTypes[$type];
187  if ( $type === "7zip" ) {
188  $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
189  } else {
190  $sink = new $class( $file );
191  }
192 
193  break;
194  case 'filter':
195  if ( is_null( $sink ) ) {
196  $sink = new DumpOutput();
197  }
198 
199  $split = explode( ':', $param );
200  $key = $split[0];
201 
202  if ( !isset( $this->filterTypes[$key] ) ) {
203  $this->fatalError( "Unrecognized filter type '$key'" );
204  }
205 
206  $type = $this->filterTypes[$key];
207 
208  if ( count( $split ) === 1 ) {
209  $filter = new $type( $sink );
210  } elseif ( count( $split ) === 2 ) {
211  $filter = new $type( $sink, $split[1] );
212  } else {
213  $this->fatalError( 'Invalid filter parameter' );
214  }
215 
216  // references are lame in php...
217  unset( $sink );
218  $sink = $filter;
219 
220  break;
221  }
222  }
223 
224  if ( $this->hasOption( 'report' ) ) {
225  $this->reportingInterval = intval( $this->getOption( 'report' ) );
226  }
227 
228  if ( $this->hasOption( 'server' ) ) {
229  $this->server = $this->getOption( 'server' );
230  }
231 
232  if ( is_null( $sink ) ) {
233  $sink = new DumpOutput();
234  }
235  $sinks[] = $sink;
236 
237  if ( count( $sinks ) > 1 ) {
238  $this->sink = new DumpMultiWriter( $sinks );
239  } else {
240  $this->sink = $sink;
241  }
242  }
243 
244  function dump( $history, $text = WikiExporter::TEXT ) {
245  # Notice messages will foul up your XML output even if they're
246  # relatively harmless.
247  if ( ini_get( 'display_errors' ) ) {
248  ini_set( 'display_errors', 'stderr' );
249  }
250 
251  $this->initProgress( $history );
252 
253  $db = $this->backupDb();
254  $exporter = new WikiExporter( $db, $history, WikiExporter::STREAM, $text );
255  $exporter->dumpUploads = $this->dumpUploads;
256  $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
257 
258  $wrapper = new ExportProgressFilter( $this->sink, $this );
259  $exporter->setOutputSink( $wrapper );
260 
261  if ( !$this->skipHeader ) {
262  $exporter->openStream();
263  }
264  # Log item dumps: all or by range
265  if ( $history & WikiExporter::LOGS ) {
266  if ( $this->startId || $this->endId ) {
267  $exporter->logsByRange( $this->startId, $this->endId );
268  } else {
269  $exporter->allLogs();
270  }
271  } elseif ( is_null( $this->pages ) ) {
272  # Page dumps: all or by page ID range
273  if ( $this->startId || $this->endId ) {
274  $exporter->pagesByRange( $this->startId, $this->endId );
275  } elseif ( $this->revStartId || $this->revEndId ) {
276  $exporter->revsByRange( $this->revStartId, $this->revEndId );
277  } else {
278  $exporter->allPages();
279  }
280  } else {
281  # Dump of specific pages
282  $exporter->pagesByName( $this->pages );
283  }
284 
285  if ( !$this->skipFooter ) {
286  $exporter->closeStream();
287  }
288 
289  $this->report( true );
290  }
291 
298  function initProgress( $history = WikiExporter::FULL ) {
299  $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
300  $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
301 
303  if ( $this->forcedDb === null ) {
304  $dbr = wfGetDB( DB_SLAVE );
305  }
306  $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
307  $this->startTime = microtime( true );
308  $this->lastTime = $this->startTime;
309  $this->ID = getmypid();
310  }
311 
318  function backupDb() {
319  if ( $this->forcedDb !== null ) {
320  return $this->forcedDb;
321  }
322 
323  $this->lb = wfGetLBFactory()->newMainLB();
324  $db = $this->lb->getConnection( DB_SLAVE, 'dump' );
325 
326  // Discourage the server from disconnecting us if it takes a long time
327  // to read out the big ol' batch query.
328  $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
329 
330  return $db;
331  }
332 
340  function setDB( IDatabase $db = null ) {
341  parent::setDB( $db );
342  $this->forcedDb = $db;
343  }
344 
345  function __destruct() {
346  if ( isset( $this->lb ) ) {
347  $this->lb->closeAll();
348  }
349  }
350 
351  function backupServer() {
353 
354  return $this->server
355  ? $this->server
356  : $wgDBserver;
357  }
358 
359  function reportPage() {
360  $this->pageCount++;
361  }
362 
363  function revCount() {
364  $this->revCount++;
365  $this->report();
366  }
367 
368  function report( $final = false ) {
369  if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
370  $this->showReport();
371  }
372  }
373 
374  function showReport() {
375  if ( $this->reporting ) {
376  $now = wfTimestamp( TS_DB );
377  $nowts = microtime( true );
378  $deltaAll = $nowts - $this->startTime;
379  $deltaPart = $nowts - $this->lastTime;
380  $this->pageCountPart = $this->pageCount - $this->pageCountLast;
381  $this->revCountPart = $this->revCount - $this->revCountLast;
382 
383  if ( $deltaAll ) {
384  $portion = $this->revCount / $this->maxCount;
385  $eta = $this->startTime + $deltaAll / $portion;
386  $etats = wfTimestamp( TS_DB, intval( $eta ) );
387  $pageRate = $this->pageCount / $deltaAll;
388  $revRate = $this->revCount / $deltaAll;
389  } else {
390  $pageRate = '-';
391  $revRate = '-';
392  $etats = '-';
393  }
394  if ( $deltaPart ) {
395  $pageRatePart = $this->pageCountPart / $deltaPart;
396  $revRatePart = $this->revCountPart / $deltaPart;
397  } else {
398  $pageRatePart = '-';
399  $revRatePart = '-';
400  }
401  $this->progress( sprintf(
402  "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
403  . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
404  $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
405  $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
406  $this->maxCount
407  ) );
408  $this->lastTime = $nowts;
409  $this->revCountLast = $this->revCount;
410  }
411  }
412 
413  function progress( $string ) {
414  if ( $this->reporting ) {
415  fwrite( $this->stderr, $string . "\n" );
416  }
417  }
418 
419  function fatalError( $msg ) {
420  $this->error( "$msg\n", 1 );
421  }
422 }
423 
425  function __construct( &$sink, &$progress ) {
426  parent::__construct( $sink );
427  $this->progress = $progress;
428  }
429 
430  function writeClosePage( $string ) {
431  parent::writeClosePage( $string );
432  $this->progress->reportPage();
433  }
434 
435  function writeRevision( $rev, $string ) {
436  parent::writeRevision( $rev, $string );
437  $this->progress->revCount();
438  }
439 }
setDB(IDatabase $db=null)
Force the dump to use the provided database connection for database operations, wherever possible...
Definition: backup.inc:340
processOptions()
Processes arguments and sets $this->$sink accordingly.
Definition: backup.inc:151
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
LoadBalancer $lb
Definition: backup.inc:69
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2321
array $orderedOptions
Used to read the options in the order they were passed.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
$wgDBserver
Database host name or IP address.
hasOption($name)
Checks to see if a particular param exists.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
DumpOutput $sink
FIXME will need to be made protected whenever legacy code is updated.
Definition: DumpFilter.php:37
report($final=false)
Definition: backup.inc:368
$dumpUploadFileContents
Definition: backup.inc:43
if($line===false) $args
Definition: cdb.php:64
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
addOption($name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
fatalError($msg)
Definition: backup.inc:419
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1004
const DB_SLAVE
Definition: Defines.php:46
__construct($args=null)
Definition: backup.inc:77
wfWikiID()
Get an ASCII string identifying this wiki This is used as a prefix in memcached keys.
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1584
backupServer()
Definition: backup.inc:351
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
const TS_DB
MySQL DATETIME (YYYY-MM-DD HH:MM:SS)
getOption($name, $default=null)
Get an option, or return the default.
loadWithArgv($argv)
Load params and arguments from a given array of command-line arguments.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfGetLBFactory()
Get the load balancer factory object.
The ContentHandler facility adds support for arbitrary content types on wiki pages
writeRevision($rev, $string)
Definition: backup.inc:435
registerFilter($name, $class)
Definition: backup.inc:125
error($err, $die=0)
Throw an error to the user.
initProgress($history=WikiExporter::FULL)
Initialise starting time and maximum revision count.
Definition: backup.inc:298
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of and they certainly aren t ideal for someone who s installing MediaWiki as MediaWiki does not conform to normal Unix filesystem layout Hopefully we ll offer direct support for standard layouts in the but for now *any change to the location of files is unsupported *Moving things and leaving symlinks will *probably *not break but it is *strongly *advised not to try any more intrusive changes to get MediaWiki to conform more closely to your filesystem hierarchy Any such attempt will almost certainly result in unnecessary bugs The standard recommended location to install relative to the web is it should be possible to enable the appropriate rewrite rules by if you can reconfigure the web server
$reportingInterval
Definition: backup.inc:45
__construct(&$sink, &$progress)
Definition: backup.inc:425
registerOutput($name, $class)
Definition: backup.inc:117
writeClosePage($string)
Definition: backup.inc:430
progress($string)
Definition: backup.inc:413
loadPlugin($class, $file)
Load a plugin and register it.
Definition: backup.inc:136
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2338
Basic database interface for live and lazy-loaded DB handles.
Definition: IDatabase.php:35
DatabaseBase null $forcedDb
The dependency-injected database to use.
Definition: backup.inc:66
dump($history, $text=WikiExporter::TEXT)
Definition: backup.inc:244
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:310