MediaWiki  master
importDump.php
Go to the documentation of this file.
1 <?php
29 
30 require_once __DIR__ . '/Maintenance.php';
31 
37 class BackupReader extends Maintenance {
39  public $reportingInterval = 100;
41  public $pageCount = 0;
43  public $revCount = 0;
45  public $dryRun = false;
47  public $uploads = false;
49  protected $uploadCount = 0;
51  public $imageBasePath = false;
53  public $nsFilter = false;
55  public $stderr;
57  protected $importCallback;
59  protected $logItemCallback;
61  protected $uploadCallback;
63  protected $startTime;
64 
65  public function __construct() {
66  parent::__construct();
67  $gz = in_array( 'compress.zlib', stream_get_wrappers() )
68  ? 'ok'
69  : '(disabled; requires PHP zlib module)';
70  $bz2 = in_array( 'compress.bzip2', stream_get_wrappers() )
71  ? 'ok'
72  : '(disabled; requires PHP bzip2 module)';
73 
74  $this->addDescription(
75  <<<TEXT
76 This script reads pages from an XML file as produced from Special:Export or
77 dumpBackup.php, and saves them into the current wiki.
78 
79 Compressed XML files may be read directly:
80  .gz $gz
81  .bz2 $bz2
82  .7z (if 7za executable is in PATH)
83 
84 Note that for very large data sets, importDump.php may be slow; there are
85 alternate methods which can be much faster for full site restoration:
86 <https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps>
87 TEXT
88  );
89  $this->stderr = fopen( "php://stderr", "wt" );
90  $this->addOption( 'report',
91  'Report position and speed after every n pages processed', false, true );
92  $this->addOption( 'namespaces',
93  'Import only the pages from namespaces belonging to the list of ' .
94  'pipe-separated namespace names or namespace indexes', false, true );
95  $this->addOption( 'rootpage', 'Pages will be imported as subpages of the specified page',
96  false, true );
97  $this->addOption( 'dry-run', 'Parse dump without actually importing pages' );
98  $this->addOption( 'debug', 'Output extra verbose debug information' );
99  $this->addOption( 'uploads', 'Process file upload data if included (experimental)' );
100  $this->addOption(
101  'no-updates',
102  'Disable link table updates. Is faster but leaves the wiki in an inconsistent state'
103  );
104  $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
105  $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true );
106  $this->addOption( 'username-prefix', 'Prefix for interwiki usernames', false, true );
107  $this->addOption( 'no-local-users',
108  'Treat all usernames as interwiki. ' .
109  'The default is to assign edits to local users where they exist.',
110  false, false
111  );
112  $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
113  }
114 
115  public function execute() {
116  if ( MediaWikiServices::getInstance()->getReadOnlyMode()->isReadOnly() ) {
117  $this->fatalError( "Wiki is in read-only mode; you'll need to disable it for import to work." );
118  }
119 
120  $this->reportingInterval = intval( $this->getOption( 'report', 100 ) );
121  if ( !$this->reportingInterval ) {
122  // avoid division by zero
123  $this->reportingInterval = 100;
124  }
125 
126  $this->dryRun = $this->hasOption( 'dry-run' );
127  $this->uploads = $this->hasOption( 'uploads' );
128 
129  if ( $this->hasOption( 'image-base-path' ) ) {
130  $this->imageBasePath = $this->getOption( 'image-base-path' );
131  }
132  if ( $this->hasOption( 'namespaces' ) ) {
133  $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) );
134  }
135 
136  if ( $this->hasArg( 0 ) ) {
137  $this->importFromFile( $this->getArg( 0 ) );
138  } else {
139  $this->importFromStdin();
140  }
141 
142  $this->output( "Done!\n" );
143  $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges,\n" );
144  $this->output( "and initSiteStats.php to update page and revision counts\n" );
145  }
146 
147  private function setNsfilter( array $namespaces ) {
148  if ( count( $namespaces ) == 0 ) {
149  $this->nsFilter = false;
150 
151  return;
152  }
153  $this->nsFilter = array_unique( array_map( [ $this, 'getNsIndex' ], $namespaces ) );
154  }
155 
156  private function getNsIndex( $namespace ) {
157  $contLang = MediaWikiServices::getInstance()->getContentLanguage();
158  $result = $contLang->getNsIndex( $namespace );
159  if ( $result !== false ) {
160  return $result;
161  }
162  $ns = intval( $namespace );
163  if ( strval( $ns ) === $namespace && $contLang->getNsText( $ns ) !== false ) {
164  return $ns;
165  }
166  $this->fatalError( "Unknown namespace text / index specified: $namespace" );
167  }
168 
174  private function skippedNamespace( $title ) {
175  if ( $title === null ) {
176  // Probably a log entry
177  return false;
178  }
179 
180  $ns = $title->getNamespace();
181 
182  return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
183  }
184 
185  public function reportPage( $page ) {
186  $this->pageCount++;
187  }
188 
192  public function handleRevision( WikiRevision $rev ) {
193  $title = $rev->getTitle();
194  if ( !$title ) {
195  $this->progress( "Got bogus revision with null title!" );
196 
197  return;
198  }
199 
200  if ( $this->skippedNamespace( $title ) ) {
201  return;
202  }
203 
204  $this->revCount++;
205  $this->report();
206 
207  if ( !$this->dryRun ) {
208  call_user_func( $this->importCallback, $rev );
209  }
210  }
211 
216  public function handleUpload( WikiRevision $revision ) {
217  if ( $this->uploads ) {
218  if ( $this->skippedNamespace( $revision->getTitle() ) ) {
219  return false;
220  }
221  $this->uploadCount++;
222  // $this->report();
223  $this->progress( "upload: " . $revision->getFilename() );
224 
225  if ( !$this->dryRun ) {
226  // bluuuh hack
227  // call_user_func( $this->uploadCallback, $revision );
228  $dbw = $this->getDB( DB_PRIMARY );
229 
230  return $dbw->deadlockLoop( [ $revision, 'importUpload' ] );
231  }
232  }
233 
234  return false;
235  }
236 
240  public function handleLogItem( WikiRevision $rev ) {
241  if ( $this->skippedNamespace( $rev->getTitle() ) ) {
242  return;
243  }
244  $this->revCount++;
245  $this->report();
246 
247  if ( !$this->dryRun ) {
248  call_user_func( $this->logItemCallback, $rev );
249  }
250  }
251 
252  private function report( $final = false ) {
253  if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
254  $this->showReport();
255  }
256  }
257 
258  private function showReport() {
259  if ( !$this->mQuiet ) {
260  $delta = microtime( true ) - $this->startTime;
261  if ( $delta ) {
262  $rate = sprintf( "%.2f", $this->pageCount / $delta );
263  $revrate = sprintf( "%.2f", $this->revCount / $delta );
264  } else {
265  $rate = '-';
266  $revrate = '-';
267  }
268  # Logs dumps don't have page tallies
269  if ( $this->pageCount ) {
270  $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" );
271  } else {
272  $this->progress( "$this->revCount ($revrate revs/sec)" );
273  }
274  }
275  MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
276  }
277 
278  private function progress( $string ) {
279  fwrite( $this->stderr, $string . "\n" );
280  }
281 
282  private function importFromFile( $filename ) {
283  if ( preg_match( '/\.gz$/', $filename ) ) {
284  $filename = 'compress.zlib://' . $filename;
285  } elseif ( preg_match( '/\.bz2$/', $filename ) ) {
286  $filename = 'compress.bzip2://' . $filename;
287  } elseif ( preg_match( '/\.7z$/', $filename ) ) {
288  $filename = 'mediawiki.compress.7z://' . $filename;
289  }
290 
291  $file = fopen( $filename, 'rt' );
292  if ( $file === false ) {
293  $this->fatalError( error_get_last()['message'] ?? 'Could not open file' );
294  }
295 
296  return $this->importFromHandle( $file );
297  }
298 
299  private function importFromStdin() {
300  $file = fopen( 'php://stdin', 'rt' );
301  if ( self::posix_isatty( $file ) ) {
302  $this->maybeHelp( true );
303  }
304 
305  return $this->importFromHandle( $file );
306  }
307 
308  private function importFromHandle( $handle ) {
309  $this->startTime = microtime( true );
310 
311  $source = new ImportStreamSource( $handle );
312  $importer = MediaWikiServices::getInstance()
313  ->getWikiImporterFactory()
314  ->getWikiImporter( $source );
315 
316  // Updating statistics require a lot of time so disable it
317  $importer->disableStatisticsUpdate();
318 
319  if ( $this->hasOption( 'debug' ) ) {
320  $importer->setDebug( true );
321  }
322  if ( $this->hasOption( 'no-updates' ) ) {
323  $importer->setNoUpdates( true );
324  }
325  if ( $this->hasOption( 'username-prefix' ) ) {
326  $importer->setUsernamePrefix(
327  $this->getOption( 'username-prefix' ),
328  !$this->hasOption( 'no-local-users' )
329  );
330  }
331  if ( $this->hasOption( 'rootpage' ) ) {
332  $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) );
333  if ( !$statusRootPage->isGood() ) {
334  // Die here so that it doesn't print "Done!"
335  $this->fatalError( $statusRootPage->getMessage( false, false, 'en' )->text() );
336  }
337  }
338  if ( $this->hasOption( 'skip-to' ) ) {
339  $nthPage = (int)$this->getOption( 'skip-to' );
340  $importer->setPageOffset( $nthPage );
341  $this->pageCount = $nthPage - 1;
342  }
343  $importer->setPageCallback( [ $this, 'reportPage' ] );
344  $importer->setNoticeCallback( static function ( $msg, $params ) {
345  echo wfMessage( $msg, $params )->text() . "\n";
346  } );
347  $this->importCallback = $importer->setRevisionCallback(
348  [ $this, 'handleRevision' ] );
349  $this->uploadCallback = $importer->setUploadCallback(
350  [ $this, 'handleUpload' ] );
351  $this->logItemCallback = $importer->setLogItemCallback(
352  [ $this, 'handleLogItem' ] );
353  if ( $this->uploads ) {
354  $importer->setImportUploads( true );
355  }
356  if ( $this->imageBasePath ) {
357  $importer->setImageBasePath( $this->imageBasePath );
358  }
359 
360  if ( $this->dryRun ) {
361  $importer->setPageOutCallback( null );
362  }
363 
364  return $importer->doImport();
365  }
366 }
367 
368 $maintClass = BackupReader::class;
369 require_once RUN_MAINTENANCE_IF_MAIN;
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Maintenance script that imports XML dump files into the current wiki.
Definition: importDump.php:37
resource false $stderr
Definition: importDump.php:55
handleRevision(WikiRevision $rev)
Definition: importDump.php:192
string false $imageBasePath
Definition: importDump.php:51
float $startTime
Definition: importDump.php:63
callable null $logItemCallback
Definition: importDump.php:59
callable null $uploadCallback
Definition: importDump.php:61
array false $nsFilter
Definition: importDump.php:53
execute()
Do the actual work.
Definition: importDump.php:115
handleLogItem(WikiRevision $rev)
Definition: importDump.php:240
__construct()
Default constructor.
Definition: importDump.php:65
reportPage( $page)
Definition: importDump.php:185
callable null $importCallback
Definition: importDump.php:57
int $reportingInterval
Definition: importDump.php:39
handleUpload(WikiRevision $revision)
Definition: importDump.php:216
int $uploadCount
Definition: importDump.php:49
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasArg( $argId=0)
Does a given argument exist?
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
maybeHelp( $force=false)
Maybe show the help.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Service locator for MediaWiki core services.
Represents a revision, log entry or upload during the import process.
$maintClass
Definition: importDump.php:368
$source
const DB_PRIMARY
Definition: defines.php:28
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42