Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 183 |
|
0.00% |
0 / 15 |
CRAP | |
0.00% |
0 / 1 |
BackupDumper | |
0.00% |
0 / 182 |
|
0.00% |
0 / 15 |
2862 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
6 | |||
finalSetup | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
registerOutput | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
registerFilter | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
loadPlugin | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
processOptions | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
306 | |||
dump | |
0.00% |
0 / 32 |
|
0.00% |
0 / 1 |
156 | |||
initProgress | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
getBackupDatabase | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
setDB | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
reportPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
revCount | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
report | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
showReport | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
20 | |||
progress | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * Base classes for database-dumping maintenance scripts. |
4 | * |
5 | * Copyright © 2005 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Dump |
25 | * @ingroup Maintenance |
26 | */ |
27 | |
28 | namespace MediaWiki\Maintenance; |
29 | |
30 | // @codeCoverageIgnoreStart |
31 | require_once __DIR__ . '/../Maintenance.php'; |
32 | require_once __DIR__ . '/../../includes/export/WikiExporter.php'; |
33 | // @codeCoverageIgnoreEnd |
34 | |
35 | use DumpMultiWriter; |
36 | use DumpOutput; |
37 | use ExportProgressFilter; |
38 | use MediaWiki\MainConfigNames; |
39 | use MediaWiki\Settings\SettingsBuilder; |
40 | use MediaWiki\WikiMap\WikiMap; |
41 | use WikiExporter; |
42 | use Wikimedia\Rdbms\IDatabase; |
43 | use Wikimedia\Rdbms\IMaintainableDatabase; |
44 | use XmlDumpWriter; |
45 | |
46 | /** |
47 | * @ingroup Dump |
48 | * @ingroup Maintenance |
49 | */ |
50 | abstract class BackupDumper extends Maintenance { |
51 | /** @var bool */ |
52 | public $reporting = true; |
53 | /** @var string[]|null null means all pages */ |
54 | public $pages = null; |
55 | /** @var bool don't output <mediawiki> and <siteinfo> */ |
56 | public $skipHeader = false; |
57 | /** @var bool don't output </mediawiki> */ |
58 | public $skipFooter = false; |
59 | /** @var int */ |
60 | public $startId = 0; |
61 | /** @var int */ |
62 | public $endId = 0; |
63 | /** @var int */ |
64 | public $revStartId = 0; |
65 | /** @var int */ |
66 | public $revEndId = 0; |
67 | /** @var bool */ |
68 | public $dumpUploads = false; |
69 | /** @var bool */ |
70 | public $dumpUploadFileContents = false; |
71 | /** @var bool */ |
72 | public $orderRevs = false; |
73 | /** @var array|null */ |
74 | public $limitNamespaces = []; |
75 | /** @var resource|false */ |
76 | public $stderr; |
77 | |
78 | /** @var int */ |
79 | protected $reportingInterval = 100; |
80 | /** @var int */ |
81 | protected $pageCount = 0; |
82 | /** @var int */ |
83 | protected $revCount = 0; |
84 | /** @var string|null null means use default */ |
85 | protected $schemaVersion = null; |
86 | /** @var DumpMultiWriter|DumpOutput|null Output filters */ |
87 | protected $sink = null; |
88 | /** @var float */ |
89 | protected $lastTime = 0; |
90 | /** @var int */ |
91 | protected $pageCountLast = 0; |
92 | /** @var int */ |
93 | protected $revCountLast = 0; |
94 | |
95 | /** @var string[] */ |
96 | protected $outputTypes = []; |
97 | /** @var string[] */ |
98 | protected $filterTypes = []; |
99 | |
100 | /** @var int */ |
101 | protected $ID = 0; |
102 | |
103 | /** @var float */ |
104 | protected $startTime; |
105 | /** @var int */ |
106 | protected $pageCountPart; |
107 | /** @var int */ |
108 | protected $revCountPart; |
109 | /** @var int */ |
110 | protected $maxCount; |
111 | /** @var float */ |
112 | protected $timeOfCheckpoint; |
113 | /** @var ExportProgressFilter */ |
114 | protected $egress; |
115 | /** @var string */ |
116 | protected $buffer; |
117 | /** @var array|false */ |
118 | protected $openElement; |
119 | /** @var bool */ |
120 | protected $atStart; |
121 | /** @var string|null */ |
122 | protected $thisRevModel; |
123 | /** @var string|null */ |
124 | protected $thisRevFormat; |
125 | /** @var string */ |
126 | protected $lastName; |
127 | /** @var string */ |
128 | protected $state; |
129 | |
130 | /** |
131 | * The dependency-injected database to use. |
132 | * |
133 | * @var IMaintainableDatabase|null |
134 | * |
135 | * @see self::setDB |
136 | */ |
137 | protected $forcedDb = null; |
138 | |
139 | /** |
140 | * @param array|null $args For backward compatibility |
141 | */ |
142 | public function __construct( $args = null ) { |
143 | parent::__construct(); |
144 | $this->stderr = fopen( "php://stderr", "wt" ); |
145 | |
146 | // Built-in output and filter plugins |
147 | $this->registerOutput( 'file', \DumpFileOutput::class ); |
148 | $this->registerOutput( 'gzip', \DumpGZipOutput::class ); |
149 | $this->registerOutput( 'bzip2', \DumpBZip2Output::class ); |
150 | $this->registerOutput( 'dbzip2', \DumpDBZip2Output::class ); |
151 | $this->registerOutput( 'lbzip2', \DumpLBZip2Output::class ); |
152 | $this->registerOutput( '7zip', \Dump7ZipOutput::class ); |
153 | |
154 | $this->registerFilter( 'latest', \DumpLatestFilter::class ); |
155 | $this->registerFilter( 'notalk', \DumpNotalkFilter::class ); |
156 | $this->registerFilter( 'namespace', \DumpNamespaceFilter::class ); |
157 | |
158 | // These three can be specified multiple times |
159 | $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].', |
160 | false, true, false, true ); |
161 | $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' . |
162 | '<type>s: file, gzip, bzip2, 7zip, dbzip2, lbzip2', false, true, 'o', true ); |
163 | $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' . |
164 | '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true ); |
165 | $this->addOption( 'report', 'Report position and speed after every n pages processed. ' . |
166 | 'Default: 100.', false, true ); |
167 | $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' . |
168 | '-mx option to 7za command.', false, true ); |
169 | // NOTE: we can't know the default schema version yet, since configuration has not been |
170 | // loaded when this constructor is called. To work around this, we re-declare |
171 | // this option in validateParamsAndArgs(). |
172 | $this->addOption( 'schema-version', 'Schema version to use for output.', false, true ); |
173 | |
174 | if ( $args ) { |
175 | // Args should be loaded and processed so that dump() can be called directly |
176 | // instead of execute() |
177 | $this->loadWithArgv( $args ); |
178 | $this->processOptions(); |
179 | } |
180 | } |
181 | |
182 | public function finalSetup( SettingsBuilder $settingsBuilder ) { |
183 | parent::finalSetup( $settingsBuilder ); |
184 | // re-declare the --schema-version option to include the default schema version |
185 | // in the description. |
186 | $schemaVersion = $settingsBuilder->getConfig()->get( MainConfigNames::XmlDumpSchemaVersion ); |
187 | $this->addOption( 'schema-version', 'Schema version to use for output. ' . |
188 | 'Default: ' . $schemaVersion, false, true ); |
189 | } |
190 | |
191 | /** |
192 | * @param string $name |
193 | * @param string $class Name of output filter plugin class |
194 | */ |
195 | public function registerOutput( $name, $class ) { |
196 | $this->outputTypes[$name] = $class; |
197 | } |
198 | |
199 | /** |
200 | * @param string $name |
201 | * @param string $class Name of filter plugin class |
202 | */ |
203 | public function registerFilter( $name, $class ) { |
204 | $this->filterTypes[$name] = $class; |
205 | } |
206 | |
207 | /** |
208 | * Load a plugin and register it |
209 | * |
210 | * @param string $class Name of plugin class; must have a static 'register' |
211 | * method that takes a BackupDumper as a parameter. |
212 | * @param string $file Full or relative path to the PHP file to load, or empty |
213 | */ |
214 | public function loadPlugin( $class, $file ) { |
215 | if ( $file != '' ) { |
216 | require_once $file; |
217 | } |
218 | $register = [ $class, 'register' ]; |
219 | $register( $this ); |
220 | } |
221 | |
222 | /** |
223 | * Processes arguments and sets $this->$sink accordingly |
224 | */ |
225 | protected function processOptions() { |
226 | $sink = null; |
227 | $sinks = []; |
228 | |
229 | $this->schemaVersion = WikiExporter::schemaVersion(); |
230 | |
231 | $options = $this->orderedOptions; |
232 | foreach ( $options as [ $opt, $param ] ) { |
233 | switch ( $opt ) { |
234 | case 'plugin': |
235 | $val = explode( ':', $param, 2 ); |
236 | |
237 | if ( count( $val ) === 1 ) { |
238 | $this->loadPlugin( $val[0], '' ); |
239 | } elseif ( count( $val ) === 2 ) { |
240 | $this->loadPlugin( $val[0], $val[1] ); |
241 | } |
242 | |
243 | break; |
244 | case 'output': |
245 | $split = explode( ':', $param, 2 ); |
246 | if ( count( $split ) !== 2 ) { |
247 | $this->fatalError( 'Invalid output parameter' ); |
248 | } |
249 | [ $type, $file ] = $split; |
250 | if ( $sink !== null ) { |
251 | $sinks[] = $sink; |
252 | } |
253 | if ( !isset( $this->outputTypes[$type] ) ) { |
254 | $this->fatalError( "Unrecognized output sink type '$type'" ); |
255 | } |
256 | $class = $this->outputTypes[$type]; |
257 | if ( $type === "7zip" ) { |
258 | $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) ); |
259 | } else { |
260 | $sink = new $class( $file ); |
261 | } |
262 | |
263 | break; |
264 | case 'filter': |
265 | $sink ??= new DumpOutput(); |
266 | |
267 | $split = explode( ':', $param, 2 ); |
268 | $key = $split[0]; |
269 | |
270 | if ( !isset( $this->filterTypes[$key] ) ) { |
271 | $this->fatalError( "Unrecognized filter type '$key'" ); |
272 | } |
273 | |
274 | $type = $this->filterTypes[$key]; |
275 | |
276 | if ( count( $split ) === 2 ) { |
277 | $filter = new $type( $sink, $split[1] ); |
278 | } else { |
279 | $filter = new $type( $sink ); |
280 | } |
281 | |
282 | // references are lame in php... |
283 | unset( $sink ); |
284 | $sink = $filter; |
285 | |
286 | break; |
287 | case 'schema-version': |
288 | if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) { |
289 | $this->fatalError( |
290 | "Unsupported schema version $param. Supported versions: " . |
291 | implode( ', ', XmlDumpWriter::$supportedSchemas ) |
292 | ); |
293 | } |
294 | $this->schemaVersion = $param; |
295 | break; |
296 | } |
297 | } |
298 | |
299 | if ( $this->hasOption( 'report' ) ) { |
300 | $this->reportingInterval = intval( $this->getOption( 'report' ) ); |
301 | } |
302 | |
303 | $sink ??= new DumpOutput(); |
304 | $sinks[] = $sink; |
305 | |
306 | if ( count( $sinks ) > 1 ) { |
307 | $this->sink = new DumpMultiWriter( $sinks ); |
308 | } else { |
309 | $this->sink = $sink; |
310 | } |
311 | } |
312 | |
313 | public function dump( $history, $text = WikiExporter::TEXT ) { |
314 | # Notice messages will foul up your XML output even if they're |
315 | # relatively harmless. |
316 | if ( ini_get( 'display_errors' ) ) { |
317 | ini_set( 'display_errors', 'stderr' ); |
318 | } |
319 | |
320 | $this->initProgress( $history ); |
321 | |
322 | $services = $this->getServiceContainer(); |
323 | $exporter = $services->getWikiExporterFactory()->getWikiExporter( |
324 | $this->getBackupDatabase(), |
325 | $history, |
326 | $text, |
327 | $this->limitNamespaces |
328 | ); |
329 | $exporter->setSchemaVersion( $this->schemaVersion ); |
330 | $exporter->dumpUploads = $this->dumpUploads; |
331 | $exporter->dumpUploadFileContents = $this->dumpUploadFileContents; |
332 | |
333 | $wrapper = new ExportProgressFilter( $this->sink, $this ); |
334 | $exporter->setOutputSink( $wrapper ); |
335 | |
336 | if ( !$this->skipHeader ) { |
337 | $exporter->openStream(); |
338 | } |
339 | # Log item dumps: all or by range |
340 | if ( $history & WikiExporter::LOGS ) { |
341 | if ( $this->startId || $this->endId ) { |
342 | $exporter->logsByRange( $this->startId, $this->endId ); |
343 | } else { |
344 | $exporter->allLogs(); |
345 | } |
346 | } elseif ( $this->pages === null ) { |
347 | # Page dumps: all or by page ID range |
348 | if ( $this->startId || $this->endId ) { |
349 | $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs ); |
350 | } elseif ( $this->revStartId || $this->revEndId ) { |
351 | $exporter->revsByRange( $this->revStartId, $this->revEndId ); |
352 | } else { |
353 | $exporter->allPages(); |
354 | } |
355 | } else { |
356 | # Dump of specific pages |
357 | $exporter->pagesByName( $this->pages ); |
358 | } |
359 | |
360 | if ( !$this->skipFooter ) { |
361 | $exporter->closeStream(); |
362 | } |
363 | |
364 | $this->report( true ); |
365 | } |
366 | |
367 | /** |
368 | * Initialise starting time and maximum revision count. |
369 | * We'll make ETA calculations based on progress, assuming relatively |
370 | * constant per-revision rate. |
371 | * @param int $history WikiExporter::CURRENT or WikiExporter::FULL |
372 | */ |
373 | public function initProgress( $history = WikiExporter::FULL ) { |
374 | $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision'; |
375 | $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id'; |
376 | |
377 | $dbr = $this->forcedDb; |
378 | if ( $this->forcedDb === null ) { |
379 | $dbr = $this->getDB( DB_REPLICA, [ 'dump' ] ); |
380 | } |
381 | $this->maxCount = $dbr->newSelectQueryBuilder() |
382 | ->select( "MAX($field)" ) |
383 | ->from( $table ) |
384 | ->caller( __METHOD__ )->fetchField(); |
385 | $this->startTime = microtime( true ); |
386 | $this->lastTime = $this->startTime; |
387 | $this->ID = getmypid(); |
388 | } |
389 | |
390 | /** |
391 | * @return IDatabase |
392 | */ |
393 | protected function getBackupDatabase() { |
394 | if ( $this->forcedDb !== null ) { |
395 | return $this->forcedDb; |
396 | } |
397 | |
398 | $db = $this->getServiceContainer() |
399 | ->getDBLoadBalancerFactory() |
400 | ->getMainLB() |
401 | ->getConnection( DB_REPLICA, 'dump' ); |
402 | |
403 | // Discourage the server from disconnecting us if it takes a long time |
404 | // to read out the big ol' batch query. |
405 | $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] ); |
406 | |
407 | return $db; |
408 | } |
409 | |
410 | /** |
411 | * Force the dump to use the provided database connection for database |
412 | * operations, wherever possible. |
413 | * |
414 | * @param IMaintainableDatabase $db The database connection to use |
415 | */ |
416 | public function setDB( IMaintainableDatabase $db ) { |
417 | parent::setDB( $db ); |
418 | $this->forcedDb = $db; |
419 | } |
420 | |
421 | public function reportPage() { |
422 | $this->pageCount++; |
423 | } |
424 | |
425 | public function revCount() { |
426 | $this->revCount++; |
427 | $this->report(); |
428 | } |
429 | |
430 | public function report( $final = false ) { |
431 | if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) { |
432 | $this->showReport(); |
433 | } |
434 | } |
435 | |
436 | public function showReport() { |
437 | if ( $this->reporting ) { |
438 | $now = wfTimestamp( TS_DB ); |
439 | $nowts = microtime( true ); |
440 | $deltaAll = $nowts - $this->startTime; |
441 | $deltaPart = $nowts - $this->lastTime; |
442 | $this->pageCountPart = $this->pageCount - $this->pageCountLast; |
443 | $this->revCountPart = $this->revCount - $this->revCountLast; |
444 | |
445 | if ( $deltaAll ) { |
446 | $portion = $this->revCount / $this->maxCount; |
447 | $eta = $this->startTime + $deltaAll / $portion; |
448 | $etats = wfTimestamp( TS_DB, intval( $eta ) ); |
449 | $pageRate = $this->pageCount / $deltaAll; |
450 | $revRate = $this->revCount / $deltaAll; |
451 | } else { |
452 | $pageRate = '-'; |
453 | $revRate = '-'; |
454 | $etats = '-'; |
455 | } |
456 | if ( $deltaPart ) { |
457 | $pageRatePart = $this->pageCountPart / $deltaPart; |
458 | $revRatePart = $this->revCountPart / $deltaPart; |
459 | } else { |
460 | $pageRatePart = '-'; |
461 | $revRatePart = '-'; |
462 | } |
463 | |
464 | $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId(); |
465 | $this->progress( sprintf( |
466 | "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), " |
467 | . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]", |
468 | $now, $dbDomain, $this->ID, $this->pageCount, $pageRate, |
469 | $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats, |
470 | $this->maxCount |
471 | ) ); |
472 | $this->lastTime = $nowts; |
473 | $this->revCountLast = $this->revCount; |
474 | } |
475 | } |
476 | |
477 | protected function progress( $string ) { |
478 | if ( $this->reporting ) { |
479 | fwrite( $this->stderr, $string . "\n" ); |
480 | } |
481 | } |
482 | } |
483 | |
484 | /** @deprecated class alias since 1.43 */ |
485 | class_alias( BackupDumper::class, 'BackupDumper' ); |