MediaWiki REL1_33
BackupDumper.php
Go to the documentation of this file.
1<?php
28require_once __DIR__ . '/../Maintenance.php';
29require_once __DIR__ . '/../../includes/export/WikiExporter.php';
30
34
39abstract class BackupDumper extends Maintenance {
40 public $reporting = true;
41 public $pages = null; // all pages
42 public $skipHeader = false; // don't output <mediawiki> and <siteinfo>
43 public $skipFooter = false; // don't output </mediawiki>
44 public $startId = 0;
45 public $endId = 0;
46 public $revStartId = 0;
47 public $revEndId = 0;
48 public $dumpUploads = false;
50 public $orderRevs = false;
51
52 protected $reportingInterval = 100;
53 protected $pageCount = 0;
54 protected $revCount = 0;
55 protected $schemaVersion = null; // use default
56 protected $server = null; // use default
57 protected $sink = null; // Output filters
58 protected $lastTime = 0;
59 protected $pageCountLast = 0;
60 protected $revCountLast = 0;
61
62 protected $outputTypes = [];
63 protected $filterTypes = [];
64
65 protected $ID = 0;
66
74 protected $forcedDb = null;
75
77 protected $lb;
78
82 function __construct( $args = null ) {
83 parent::__construct();
84 $this->stderr = fopen( "php://stderr", "wt" );
85
86 // Built-in output and filter plugins
87 $this->registerOutput( 'file', DumpFileOutput::class );
88 $this->registerOutput( 'gzip', DumpGZipOutput::class );
89 $this->registerOutput( 'bzip2', DumpBZip2Output::class );
90 $this->registerOutput( 'dbzip2', DumpDBZip2Output::class );
91 $this->registerOutput( 'lbzip2', DumpLBZip2Output::class );
92 $this->registerOutput( '7zip', Dump7ZipOutput::class );
93
94 $this->registerFilter( 'latest', DumpLatestFilter::class );
95 $this->registerFilter( 'notalk', DumpNotalkFilter::class );
96 $this->registerFilter( 'namespace', DumpNamespaceFilter::class );
97
98 // These three can be specified multiple times
99 $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
100 false, true, false, true );
101 $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
102 '<type>s: file, gzip, bzip2, 7zip, dbzip2, lbzip2', false, true, false, true );
103 $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
104 '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
105 $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
106 'Default: 100.', false, true );
107 $this->addOption( 'schema-version', 'Schema version to use for output. ' .
108 'Default: ' . WikiExporter::schemaVersion(), false, true );
109 $this->addOption( 'server', 'Force reading from MySQL server', false, true );
110 $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
111 '-mx option to 7za command.', false, true );
112
113 if ( $args ) {
114 // Args should be loaded and processed so that dump() can be called directly
115 // instead of execute()
116 $this->loadWithArgv( $args );
117 $this->processOptions();
118 }
119 }
120
125 function registerOutput( $name, $class ) {
126 $this->outputTypes[$name] = $class;
127 }
128
133 function registerFilter( $name, $class ) {
134 $this->filterTypes[$name] = $class;
135 }
136
144 function loadPlugin( $class, $file ) {
145 if ( $file != '' ) {
146 require_once $file;
147 }
148 $register = [ $class, 'register' ];
149 $register( $this );
150 }
151
152 function execute() {
153 throw new MWException( 'execute() must be overridden in subclasses' );
154 }
155
159 function processOptions() {
160 $sink = null;
161 $sinks = [];
162
163 $this->schemaVersion = WikiExporter::schemaVersion();
164
166 foreach ( $options as $arg ) {
167 list( $opt, $param ) = $arg;
168
169 switch ( $opt ) {
170 case 'plugin':
171 $val = explode( ':', $param, 2 );
172
173 if ( count( $val ) === 1 ) {
174 $this->loadPlugin( $val[0], '' );
175 } elseif ( count( $val ) === 2 ) {
176 $this->loadPlugin( $val[0], $val[1] );
177 }
178
179 break;
180 case 'output':
181 $split = explode( ':', $param, 2 );
182 if ( count( $split ) !== 2 ) {
183 $this->fatalError( 'Invalid output parameter' );
184 }
185 list( $type, $file ) = $split;
186 if ( !is_null( $sink ) ) {
187 $sinks[] = $sink;
188 }
189 if ( !isset( $this->outputTypes[$type] ) ) {
190 $this->fatalError( "Unrecognized output sink type '$type'" );
191 }
192 $class = $this->outputTypes[$type];
193 if ( $type === "7zip" ) {
194 $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
195 } else {
196 $sink = new $class( $file );
197 }
198
199 break;
200 case 'filter':
201 if ( is_null( $sink ) ) {
202 $sink = new DumpOutput();
203 }
204
205 $split = explode( ':', $param, 2 );
206 $key = $split[0];
207
208 if ( !isset( $this->filterTypes[$key] ) ) {
209 $this->fatalError( "Unrecognized filter type '$key'" );
210 }
211
212 $type = $this->filterTypes[$key];
213
214 if ( count( $split ) === 1 ) {
215 $filter = new $type( $sink );
216 } elseif ( count( $split ) === 2 ) {
217 $filter = new $type( $sink, $split[1] );
218 }
219
220 // references are lame in php...
221 unset( $sink );
222 $sink = $filter;
223
224 break;
225 case 'schema-version':
226 if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) {
227 $this->fatalError(
228 "Unsupported schema version $param. Supported versions: " .
229 implode( ', ', XmlDumpWriter::$supportedSchemas )
230 );
231 }
232 $this->schemaVersion = $param;
233 break;
234 }
235 }
236
237 if ( $this->hasOption( 'report' ) ) {
238 $this->reportingInterval = intval( $this->getOption( 'report' ) );
239 }
240
241 if ( $this->hasOption( 'server' ) ) {
242 $this->server = $this->getOption( 'server' );
243 }
244
245 if ( is_null( $sink ) ) {
246 $sink = new DumpOutput();
247 }
248 $sinks[] = $sink;
249
250 if ( count( $sinks ) > 1 ) {
251 $this->sink = new DumpMultiWriter( $sinks );
252 } else {
253 $this->sink = $sink;
254 }
255 }
256
257 function dump( $history, $text = WikiExporter::TEXT ) {
258 # Notice messages will foul up your XML output even if they're
259 # relatively harmless.
260 if ( ini_get( 'display_errors' ) ) {
261 ini_set( 'display_errors', 'stderr' );
262 }
263
264 $this->initProgress( $history );
265
266 $db = $this->backupDb();
267 $exporter = new WikiExporter( $db, $history, $text );
268 $exporter->setSchemaVersion( $this->schemaVersion );
269 $exporter->dumpUploads = $this->dumpUploads;
270 $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
271
272 $wrapper = new ExportProgressFilter( $this->sink, $this );
273 $exporter->setOutputSink( $wrapper );
274
275 if ( !$this->skipHeader ) {
276 $exporter->openStream();
277 }
278 # Log item dumps: all or by range
279 if ( $history & WikiExporter::LOGS ) {
280 if ( $this->startId || $this->endId ) {
281 $exporter->logsByRange( $this->startId, $this->endId );
282 } else {
283 $exporter->allLogs();
284 }
285 } elseif ( is_null( $this->pages ) ) {
286 # Page dumps: all or by page ID range
287 if ( $this->startId || $this->endId ) {
288 $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs );
289 } elseif ( $this->revStartId || $this->revEndId ) {
290 $exporter->revsByRange( $this->revStartId, $this->revEndId );
291 } else {
292 $exporter->allPages();
293 }
294 } else {
295 # Dump of specific pages
296 $exporter->pagesByName( $this->pages );
297 }
298
299 if ( !$this->skipFooter ) {
300 $exporter->closeStream();
301 }
302
303 $this->report( true );
304 }
305
312 function initProgress( $history = WikiExporter::FULL ) {
313 $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
314 $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
315
317 if ( $this->forcedDb === null ) {
319 }
320 $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
321 $this->startTime = microtime( true );
322 $this->lastTime = $this->startTime;
323 $this->ID = getmypid();
324 }
325
332 function backupDb() {
333 if ( $this->forcedDb !== null ) {
334 return $this->forcedDb;
335 }
336
337 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
338 $this->lb = $lbFactory->newMainLB();
339 $db = $this->lb->getConnection( DB_REPLICA, 'dump' );
340
341 // Discourage the server from disconnecting us if it takes a long time
342 // to read out the big ol' batch query.
343 $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
344
345 return $db;
346 }
347
355 function setDB( IDatabase $db = null ) {
356 parent::setDB( $db );
357 $this->forcedDb = $db;
358 }
359
360 function __destruct() {
361 if ( isset( $this->lb ) ) {
362 $this->lb->closeAll();
363 }
364 }
365
366 function backupServer() {
367 global $wgDBserver;
368
369 return $this->server ?: $wgDBserver;
370 }
371
372 function reportPage() {
373 $this->pageCount++;
374 }
375
376 function revCount() {
377 $this->revCount++;
378 $this->report();
379 }
380
381 function report( $final = false ) {
382 if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
383 $this->showReport();
384 }
385 }
386
387 function showReport() {
388 if ( $this->reporting ) {
389 $now = wfTimestamp( TS_DB );
390 $nowts = microtime( true );
391 $deltaAll = $nowts - $this->startTime;
392 $deltaPart = $nowts - $this->lastTime;
393 $this->pageCountPart = $this->pageCount - $this->pageCountLast;
394 $this->revCountPart = $this->revCount - $this->revCountLast;
395
396 if ( $deltaAll ) {
397 $portion = $this->revCount / $this->maxCount;
398 $eta = $this->startTime + $deltaAll / $portion;
399 $etats = wfTimestamp( TS_DB, intval( $eta ) );
400 $pageRate = $this->pageCount / $deltaAll;
401 $revRate = $this->revCount / $deltaAll;
402 } else {
403 $pageRate = '-';
404 $revRate = '-';
405 $etats = '-';
406 }
407 if ( $deltaPart ) {
408 $pageRatePart = $this->pageCountPart / $deltaPart;
409 $revRatePart = $this->revCountPart / $deltaPart;
410 } else {
411 $pageRatePart = '-';
412 $revRatePart = '-';
413 }
414 $this->progress( sprintf(
415 "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
416 . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
417 $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
418 $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
419 $this->maxCount
420 ) );
421 $this->lastTime = $nowts;
422 $this->revCountLast = $this->revCount;
423 }
424 }
425
426 function progress( $string ) {
427 if ( $this->reporting ) {
428 fwrite( $this->stderr, $string . "\n" );
429 }
430 }
431}
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
$wgDBserver
Database host name or IP address.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfWikiID()
Get an ASCII string identifying this wiki This is used as a prefix in memcached keys.
if( $line===false) $args
Definition cdb.php:64
dump( $history, $text=WikiExporter::TEXT)
registerFilter( $name, $class)
progress( $string)
LoadBalancer $lb
IDatabase null $forcedDb
The dependency-injected database to use.
__construct( $args=null)
execute()
Do the actual work.
initProgress( $history=WikiExporter::FULL)
Initialise starting time and maximum revision count.
processOptions()
Processes arguments and sets $this->$sink accordingly.
registerOutput( $name, $class)
setDB(IDatabase $db=null)
Force the dump to use the provided database connection for database operations, wherever possible.
loadPlugin( $class, $file)
Load a plugin and register it.
report( $final=false)
MediaWiki exception.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
array $orderedOptions
Used to read the options in the order they were passed.
hasOption( $name)
Checks to see if a particular option exists.
loadWithArgv( $argv)
Load params and arguments from a given array of command-line arguments.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Database connection, tracking, load balancing, and transaction manager for a cluster.
The ContentHandler facility adds support for arbitrary content types on wiki pages
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1999
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:271
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
setSessionOptions(array $options)
Override database's default behavior.
$filter
const DB_REPLICA
Definition defines.php:25
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition router.php:42