MediaWiki REL1_32
BackupDumper.php
Go to the documentation of this file.
1<?php
28require_once __DIR__ . '/../Maintenance.php';
29
33
38abstract class BackupDumper extends Maintenance {
39 public $reporting = true;
40 public $pages = null; // all pages
41 public $skipHeader = false; // don't output <mediawiki> and <siteinfo>
42 public $skipFooter = false; // don't output </mediawiki>
43 public $startId = 0;
44 public $endId = 0;
45 public $revStartId = 0;
46 public $revEndId = 0;
47 public $dumpUploads = false;
49 public $orderRevs = false;
50
51 protected $reportingInterval = 100;
52 protected $pageCount = 0;
53 protected $revCount = 0;
54 protected $server = null; // use default
55 protected $sink = null; // Output filters
56 protected $lastTime = 0;
57 protected $pageCountLast = 0;
58 protected $revCountLast = 0;
59
60 protected $outputTypes = [];
61 protected $filterTypes = [];
62
63 protected $ID = 0;
64
72 protected $forcedDb = null;
73
75 protected $lb;
76
77 // @todo Unused?
78 private $stubText = false; // include rev_text_id instead of text; for 2-pass dump
79
83 function __construct( $args = null ) {
84 parent::__construct();
85 $this->stderr = fopen( "php://stderr", "wt" );
86
87 // Built-in output and filter plugins
88 $this->registerOutput( 'file', DumpFileOutput::class );
89 $this->registerOutput( 'gzip', DumpGZipOutput::class );
90 $this->registerOutput( 'bzip2', DumpBZip2Output::class );
91 $this->registerOutput( 'dbzip2', DumpDBZip2Output::class );
92 $this->registerOutput( '7zip', Dump7ZipOutput::class );
93
94 $this->registerFilter( 'latest', DumpLatestFilter::class );
95 $this->registerFilter( 'notalk', DumpNotalkFilter::class );
96 $this->registerFilter( 'namespace', DumpNamespaceFilter::class );
97
98 // These three can be specified multiple times
99 $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
100 false, true, false, true );
101 $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
102 '<type>s: file, gzip, bzip2, 7zip, dbzip2', false, true, false, true );
103 $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
104 '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
105 $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
106 'Default: 100.', false, true );
107 $this->addOption( 'server', 'Force reading from MySQL server', false, true );
108 $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
109 '-mx option to 7za command.', false, true );
110
111 if ( $args ) {
112 // Args should be loaded and processed so that dump() can be called directly
113 // instead of execute()
114 $this->loadWithArgv( $args );
115 $this->processOptions();
116 }
117 }
118
123 function registerOutput( $name, $class ) {
124 $this->outputTypes[$name] = $class;
125 }
126
131 function registerFilter( $name, $class ) {
132 $this->filterTypes[$name] = $class;
133 }
134
142 function loadPlugin( $class, $file ) {
143 if ( $file != '' ) {
144 require_once $file;
145 }
146 $register = [ $class, 'register' ];
147 $register( $this );
148 }
149
150 function execute() {
151 throw new MWException( 'execute() must be overridden in subclasses' );
152 }
153
157 function processOptions() {
158 $sink = null;
159 $sinks = [];
160
162 foreach ( $options as $arg ) {
163 $opt = $arg[0];
164 $param = $arg[1];
165
166 switch ( $opt ) {
167 case 'plugin':
168 $val = explode( ':', $param );
169
170 if ( count( $val ) === 1 ) {
171 $this->loadPlugin( $val[0], '' );
172 } elseif ( count( $val ) === 2 ) {
173 $this->loadPlugin( $val[0], $val[1] );
174 } else {
175 $this->fatalError( 'Invalid plugin parameter' );
176 return;
177 }
178
179 break;
180 case 'output':
181 $split = explode( ':', $param, 2 );
182 if ( count( $split ) !== 2 ) {
183 $this->fatalError( 'Invalid output parameter' );
184 }
185 list( $type, $file ) = $split;
186 if ( !is_null( $sink ) ) {
187 $sinks[] = $sink;
188 }
189 if ( !isset( $this->outputTypes[$type] ) ) {
190 $this->fatalError( "Unrecognized output sink type '$type'" );
191 }
192 $class = $this->outputTypes[$type];
193 if ( $type === "7zip" ) {
194 $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
195 } else {
196 $sink = new $class( $file );
197 }
198
199 break;
200 case 'filter':
201 if ( is_null( $sink ) ) {
202 $sink = new DumpOutput();
203 }
204
205 $split = explode( ':', $param );
206 $key = $split[0];
207
208 if ( !isset( $this->filterTypes[$key] ) ) {
209 $this->fatalError( "Unrecognized filter type '$key'" );
210 }
211
212 $type = $this->filterTypes[$key];
213
214 if ( count( $split ) === 1 ) {
215 $filter = new $type( $sink );
216 } elseif ( count( $split ) === 2 ) {
217 $filter = new $type( $sink, $split[1] );
218 } else {
219 $this->fatalError( 'Invalid filter parameter' );
220 }
221
222 // references are lame in php...
223 unset( $sink );
224 $sink = $filter;
225
226 break;
227 }
228 }
229
230 if ( $this->hasOption( 'report' ) ) {
231 $this->reportingInterval = intval( $this->getOption( 'report' ) );
232 }
233
234 if ( $this->hasOption( 'server' ) ) {
235 $this->server = $this->getOption( 'server' );
236 }
237
238 if ( is_null( $sink ) ) {
239 $sink = new DumpOutput();
240 }
241 $sinks[] = $sink;
242
243 if ( count( $sinks ) > 1 ) {
244 $this->sink = new DumpMultiWriter( $sinks );
245 } else {
246 $this->sink = $sink;
247 }
248 }
249
250 function dump( $history, $text = WikiExporter::TEXT ) {
251 # Notice messages will foul up your XML output even if they're
252 # relatively harmless.
253 if ( ini_get( 'display_errors' ) ) {
254 ini_set( 'display_errors', 'stderr' );
255 }
256
257 $this->initProgress( $history );
258
259 $db = $this->backupDb();
260 $exporter = new WikiExporter( $db, $history, $text );
261 $exporter->dumpUploads = $this->dumpUploads;
262 $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
263
264 $wrapper = new ExportProgressFilter( $this->sink, $this );
265 $exporter->setOutputSink( $wrapper );
266
267 if ( !$this->skipHeader ) {
268 $exporter->openStream();
269 }
270 # Log item dumps: all or by range
271 if ( $history & WikiExporter::LOGS ) {
272 if ( $this->startId || $this->endId ) {
273 $exporter->logsByRange( $this->startId, $this->endId );
274 } else {
275 $exporter->allLogs();
276 }
277 } elseif ( is_null( $this->pages ) ) {
278 # Page dumps: all or by page ID range
279 if ( $this->startId || $this->endId ) {
280 $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs );
281 } elseif ( $this->revStartId || $this->revEndId ) {
282 $exporter->revsByRange( $this->revStartId, $this->revEndId );
283 } else {
284 $exporter->allPages();
285 }
286 } else {
287 # Dump of specific pages
288 $exporter->pagesByName( $this->pages );
289 }
290
291 if ( !$this->skipFooter ) {
292 $exporter->closeStream();
293 }
294
295 $this->report( true );
296 }
297
304 function initProgress( $history = WikiExporter::FULL ) {
305 $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
306 $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
307
309 if ( $this->forcedDb === null ) {
311 }
312 $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
313 $this->startTime = microtime( true );
314 $this->lastTime = $this->startTime;
315 $this->ID = getmypid();
316 }
317
324 function backupDb() {
325 if ( $this->forcedDb !== null ) {
326 return $this->forcedDb;
327 }
328
329 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
330 $this->lb = $lbFactory->newMainLB();
331 $db = $this->lb->getConnection( DB_REPLICA, 'dump' );
332
333 // Discourage the server from disconnecting us if it takes a long time
334 // to read out the big ol' batch query.
335 $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
336
337 return $db;
338 }
339
347 function setDB( IDatabase $db = null ) {
348 parent::setDB( $db );
349 $this->forcedDb = $db;
350 }
351
352 function __destruct() {
353 if ( isset( $this->lb ) ) {
354 $this->lb->closeAll();
355 }
356 }
357
358 function backupServer() {
359 global $wgDBserver;
360
361 return $this->server
362 ? $this->server
363 : $wgDBserver;
364 }
365
366 function reportPage() {
367 $this->pageCount++;
368 }
369
370 function revCount() {
371 $this->revCount++;
372 $this->report();
373 }
374
375 function report( $final = false ) {
376 if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
377 $this->showReport();
378 }
379 }
380
381 function showReport() {
382 if ( $this->reporting ) {
383 $now = wfTimestamp( TS_DB );
384 $nowts = microtime( true );
385 $deltaAll = $nowts - $this->startTime;
386 $deltaPart = $nowts - $this->lastTime;
387 $this->pageCountPart = $this->pageCount - $this->pageCountLast;
388 $this->revCountPart = $this->revCount - $this->revCountLast;
389
390 if ( $deltaAll ) {
391 $portion = $this->revCount / $this->maxCount;
392 $eta = $this->startTime + $deltaAll / $portion;
393 $etats = wfTimestamp( TS_DB, intval( $eta ) );
394 $pageRate = $this->pageCount / $deltaAll;
395 $revRate = $this->revCount / $deltaAll;
396 } else {
397 $pageRate = '-';
398 $revRate = '-';
399 $etats = '-';
400 }
401 if ( $deltaPart ) {
402 $pageRatePart = $this->pageCountPart / $deltaPart;
403 $revRatePart = $this->revCountPart / $deltaPart;
404 } else {
405 $pageRatePart = '-';
406 $revRatePart = '-';
407 }
408 $this->progress( sprintf(
409 "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
410 . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
411 $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
412 $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
413 $this->maxCount
414 ) );
415 $this->lastTime = $nowts;
416 $this->revCountLast = $this->revCount;
417 }
418 }
419
420 function progress( $string ) {
421 if ( $this->reporting ) {
422 fwrite( $this->stderr, $string . "\n" );
423 }
424 }
425}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgDBserver
Database host name or IP address.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfWikiID()
Get an ASCII string identifying this wiki This is used as a prefix in memcached keys.
if( $line===false) $args
Definition cdb.php:64
dump( $history, $text=WikiExporter::TEXT)
registerFilter( $name, $class)
progress( $string)
LoadBalancer $lb
IDatabase null $forcedDb
The dependency-injected database to use.
__construct( $args=null)
execute()
Do the actual work.
initProgress( $history=WikiExporter::FULL)
Initialise starting time and maximum revision count.
processOptions()
Processes arguments and sets $this->$sink accordingly.
registerOutput( $name, $class)
setDB(IDatabase $db=null)
Force the dump to use the provided database connection for database operations, wherever possible.
loadPlugin( $class, $file)
Load a plugin and register it.
report( $final=false)
MediaWiki exception.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
array $orderedOptions
Used to read the options in the order they were passed.
hasOption( $name)
Checks to see if a particular option exists.
loadWithArgv( $argv)
Load params and arguments from a given array of command-line arguments.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Database connection, tracking, load balancing, and transaction manager for a cluster.
The ContentHandler facility adds support for arbitrary content types on wiki pages
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of and they certainly aren t ideal for someone who s installing MediaWiki as MediaWiki does not conform to normal Unix filesystem layout Hopefully we ll offer direct support for standard layouts in the but for now *any change to the location of files is unsupported *Moving things and leaving symlinks will *probably *not break but it is *strongly *advised not to try any more intrusive changes to get MediaWiki to conform more closely to your filesystem hierarchy Any such attempt will almost certainly result in unnecessary bugs The standard recommended location to install relative to the web is it should be possible to enable the appropriate rewrite rules by if you can reconfigure the web server
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2050
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:302
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
setSessionOptions(array $options)
Override database's default behavior.
const DB_REPLICA
Definition defines.php:25