MediaWiki  master
importDump.php
Go to the documentation of this file.
1 <?php
28 
29 require_once __DIR__ . '/Maintenance.php';
30 
36 class BackupReader extends Maintenance {
38  public $reportingInterval = 100;
40  public $pageCount = 0;
42  public $revCount = 0;
44  public $dryRun = false;
46  public $uploads = false;
48  protected $uploadCount = 0;
50  public $imageBasePath = false;
52  public $nsFilter = false;
54  public $stderr;
56  protected $importCallback;
58  protected $logItemCallback;
60  protected $uploadCallback;
62  protected $startTime;
63 
64  public function __construct() {
65  parent::__construct();
66  $gz = in_array( 'compress.zlib', stream_get_wrappers() )
67  ? 'ok'
68  : '(disabled; requires PHP zlib module)';
69  $bz2 = in_array( 'compress.bzip2', stream_get_wrappers() )
70  ? 'ok'
71  : '(disabled; requires PHP bzip2 module)';
72 
73  $this->addDescription(
74  <<<TEXT
75 This script reads pages from an XML file as produced from Special:Export or
76 dumpBackup.php, and saves them into the current wiki.
77 
78 Compressed XML files may be read directly:
79  .gz $gz
80  .bz2 $bz2
81  .7z (if 7za executable is in PATH)
82 
83 Note that for very large data sets, importDump.php may be slow; there are
84 alternate methods which can be much faster for full site restoration:
85 <https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps>
86 TEXT
87  );
88  $this->stderr = fopen( "php://stderr", "wt" );
89  $this->addOption( 'report',
90  'Report position and speed after every n pages processed', false, true );
91  $this->addOption( 'namespaces',
92  'Import only the pages from namespaces belonging to the list of ' .
93  'pipe-separated namespace names or namespace indexes', false, true );
94  $this->addOption( 'rootpage', 'Pages will be imported as subpages of the specified page',
95  false, true );
96  $this->addOption( 'dry-run', 'Parse dump without actually importing pages' );
97  $this->addOption( 'debug', 'Output extra verbose debug information' );
98  $this->addOption( 'uploads', 'Process file upload data if included (experimental)' );
99  $this->addOption(
100  'no-updates',
101  'Disable link table updates. Is faster but leaves the wiki in an inconsistent state'
102  );
103  $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
104  $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true );
105  $this->addOption( 'username-prefix', 'Prefix for interwiki usernames', false, true );
106  $this->addOption( 'no-local-users',
107  'Treat all usernames as interwiki. ' .
108  'The default is to assign edits to local users where they exist.',
109  false, false
110  );
111  $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
112  }
113 
114  public function execute() {
115  if ( $this->getServiceContainer()->getReadOnlyMode()->isReadOnly() ) {
116  $this->fatalError( "Wiki is in read-only mode; you'll need to disable it for import to work." );
117  }
118 
119  $this->reportingInterval = intval( $this->getOption( 'report', 100 ) );
120  if ( !$this->reportingInterval ) {
121  // avoid division by zero
122  $this->reportingInterval = 100;
123  }
124 
125  $this->dryRun = $this->hasOption( 'dry-run' );
126  $this->uploads = $this->hasOption( 'uploads' );
127 
128  if ( $this->hasOption( 'image-base-path' ) ) {
129  $this->imageBasePath = $this->getOption( 'image-base-path' );
130  }
131  if ( $this->hasOption( 'namespaces' ) ) {
132  $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) );
133  }
134 
135  if ( $this->hasArg( 0 ) ) {
136  $this->importFromFile( $this->getArg( 0 ) );
137  } else {
138  $this->importFromStdin();
139  }
140 
141  $this->output( "Done!\n" );
142  $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges,\n" );
143  $this->output( "and initSiteStats.php to update page and revision counts\n" );
144  }
145 
146  private function setNsfilter( array $namespaces ) {
147  if ( count( $namespaces ) == 0 ) {
148  $this->nsFilter = false;
149 
150  return;
151  }
152  $this->nsFilter = array_unique( array_map( [ $this, 'getNsIndex' ], $namespaces ) );
153  }
154 
155  private function getNsIndex( $namespace ) {
156  $contLang = $this->getServiceContainer()->getContentLanguage();
157  $result = $contLang->getNsIndex( $namespace );
158  if ( $result !== false ) {
159  return $result;
160  }
161  $ns = intval( $namespace );
162  if ( strval( $ns ) === $namespace && $contLang->getNsText( $ns ) !== false ) {
163  return $ns;
164  }
165  $this->fatalError( "Unknown namespace text / index specified: $namespace" );
166  }
167 
172  private function skippedNamespace( $title ) {
173  if ( $title === null ) {
174  // Probably a log entry
175  return false;
176  }
177 
178  $ns = $title->getNamespace();
179 
180  return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
181  }
182 
183  public function reportPage( $page ) {
184  $this->pageCount++;
185  }
186 
190  public function handleRevision( WikiRevision $rev ) {
191  $title = $rev->getTitle();
192  if ( !$title ) {
193  $this->progress( "Got bogus revision with null title!" );
194 
195  return;
196  }
197 
198  if ( $this->skippedNamespace( $title ) ) {
199  return;
200  }
201 
202  $this->revCount++;
203  $this->report();
204 
205  if ( !$this->dryRun ) {
206  call_user_func( $this->importCallback, $rev );
207  }
208  }
209 
214  public function handleUpload( WikiRevision $revision ) {
215  if ( $this->uploads ) {
216  if ( $this->skippedNamespace( $revision->getTitle() ) ) {
217  return false;
218  }
219  $this->uploadCount++;
220  // $this->report();
221  $this->progress( "upload: " . $revision->getFilename() );
222 
223  if ( !$this->dryRun ) {
224  // bluuuh hack
225  // call_user_func( $this->uploadCallback, $revision );
226  $importer = $this->getServiceContainer()->getWikiRevisionUploadImporter();
227  $statusValue = $importer->import( $revision );
228 
229  return $statusValue->isGood();
230  }
231  }
232 
233  return false;
234  }
235 
239  public function handleLogItem( WikiRevision $rev ) {
240  if ( $this->skippedNamespace( $rev->getTitle() ) ) {
241  return;
242  }
243  $this->revCount++;
244  $this->report();
245 
246  if ( !$this->dryRun ) {
247  call_user_func( $this->logItemCallback, $rev );
248  }
249  }
250 
251  private function report( $final = false ) {
252  if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
253  $this->showReport();
254  }
255  }
256 
257  private function showReport() {
258  if ( !$this->mQuiet ) {
259  $delta = microtime( true ) - $this->startTime;
260  if ( $delta ) {
261  $rate = sprintf( "%.2f", $this->pageCount / $delta );
262  $revrate = sprintf( "%.2f", $this->revCount / $delta );
263  } else {
264  $rate = '-';
265  $revrate = '-';
266  }
267  # Logs dumps don't have page tallies
268  if ( $this->pageCount ) {
269  $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" );
270  } else {
271  $this->progress( "$this->revCount ($revrate revs/sec)" );
272  }
273  }
274  $this->getServiceContainer()->getDBLoadBalancerFactory()->waitForReplication();
275  }
276 
277  private function progress( $string ) {
278  fwrite( $this->stderr, $string . "\n" );
279  }
280 
281  private function importFromFile( $filename ) {
282  if ( preg_match( '/\.gz$/', $filename ) ) {
283  $filename = 'compress.zlib://' . $filename;
284  } elseif ( preg_match( '/\.bz2$/', $filename ) ) {
285  $filename = 'compress.bzip2://' . $filename;
286  } elseif ( preg_match( '/\.7z$/', $filename ) ) {
287  $filename = 'mediawiki.compress.7z://' . $filename;
288  }
289 
290  $file = fopen( $filename, 'rt' );
291  if ( $file === false ) {
292  $this->fatalError( error_get_last()['message'] ?? 'Could not open file' );
293  }
294 
295  return $this->importFromHandle( $file );
296  }
297 
298  private function importFromStdin() {
299  $file = fopen( 'php://stdin', 'rt' );
300  if ( self::posix_isatty( $file ) ) {
301  $this->maybeHelp( true );
302  }
303 
304  return $this->importFromHandle( $file );
305  }
306 
307  private function importFromHandle( $handle ) {
308  $this->startTime = microtime( true );
309 
310  $source = new ImportStreamSource( $handle );
311  $importer = $this->getServiceContainer()
312  ->getWikiImporterFactory()
313  ->getWikiImporter( $source );
314 
315  // Updating statistics require a lot of time so disable it
316  $importer->disableStatisticsUpdate();
317 
318  if ( $this->hasOption( 'debug' ) ) {
319  $importer->setDebug( true );
320  }
321  if ( $this->hasOption( 'no-updates' ) ) {
322  $importer->setNoUpdates( true );
323  }
324  if ( $this->hasOption( 'username-prefix' ) ) {
325  $importer->setUsernamePrefix(
326  $this->getOption( 'username-prefix' ),
327  !$this->hasOption( 'no-local-users' )
328  );
329  }
330  if ( $this->hasOption( 'rootpage' ) ) {
331  $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) );
332  if ( !$statusRootPage->isGood() ) {
333  // Die here so that it doesn't print "Done!"
334  $this->fatalError( $statusRootPage->getMessage( false, false, 'en' )->text() );
335  }
336  }
337  if ( $this->hasOption( 'skip-to' ) ) {
338  $nthPage = (int)$this->getOption( 'skip-to' );
339  $importer->setPageOffset( $nthPage );
340  $this->pageCount = $nthPage - 1;
341  }
342  $importer->setPageCallback( [ $this, 'reportPage' ] );
343  $importer->setNoticeCallback( static function ( $msg, $params ) {
344  echo wfMessage( $msg, $params )->text() . "\n";
345  } );
346  $this->importCallback = $importer->setRevisionCallback(
347  [ $this, 'handleRevision' ] );
348  $this->uploadCallback = $importer->setUploadCallback(
349  [ $this, 'handleUpload' ] );
350  $this->logItemCallback = $importer->setLogItemCallback(
351  [ $this, 'handleLogItem' ] );
352  if ( $this->uploads ) {
353  $importer->setImportUploads( true );
354  }
355  if ( $this->imageBasePath ) {
356  $importer->setImageBasePath( $this->imageBasePath );
357  }
358 
359  if ( $this->dryRun ) {
360  $importer->setPageOutCallback( null );
361  }
362 
363  return $importer->doImport();
364  }
365 }
366 
367 $maintClass = BackupReader::class;
368 require_once RUN_MAINTENANCE_IF_MAIN;
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Maintenance script that imports XML dump files into the current wiki.
Definition: importDump.php:36
resource false $stderr
Definition: importDump.php:54
handleRevision(WikiRevision $rev)
Definition: importDump.php:190
string false $imageBasePath
Definition: importDump.php:50
float $startTime
Definition: importDump.php:62
callable null $logItemCallback
Definition: importDump.php:58
callable null $uploadCallback
Definition: importDump.php:60
array false $nsFilter
Definition: importDump.php:52
execute()
Do the actual work.
Definition: importDump.php:114
handleLogItem(WikiRevision $rev)
Definition: importDump.php:239
__construct()
Default constructor.
Definition: importDump.php:64
reportPage( $page)
Definition: importDump.php:183
callable null $importCallback
Definition: importDump.php:56
int $reportingInterval
Definition: importDump.php:38
handleUpload(WikiRevision $revision)
Definition: importDump.php:214
int $uploadCount
Definition: importDump.php:48
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasArg( $argId=0)
Does a given argument exist?
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
maybeHelp( $force=false)
Maybe show the help.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Represents a revision, log entry or upload during the import process.
$maintClass
Definition: importDump.php:367
Represents the target of a wiki link.
Definition: LinkTarget.php:30
$source
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42