69 parent::__construct();
70 $gz = in_array(
'compress.zlib', stream_get_wrappers() )
72 :
'(disabled; requires PHP zlib module)';
73 $bz2 = in_array(
'compress.bzip2', stream_get_wrappers() )
75 :
'(disabled; requires PHP bzip2 module)';
79This script reads pages from an XML file as produced from Special:Export or
80dumpBackup.php, and saves them into the current wiki.
82Compressed XML files may be read directly:
85 .7z (
if 7za executable is in PATH)
87Note that
for very large data sets, importDump.php may be slow; there are
88alternate methods which can be much faster
for full site restoration:
92 $this->stderr = fopen(
"php://stderr",
"wt" );
94 'Report position and speed after every n pages processed',
false,
true );
96 'Import only the pages from namespaces belonging to the list of ' .
97 'pipe-separated namespace names or namespace indexes',
false,
true );
98 $this->
addOption(
'rootpage',
'Pages will be imported as subpages of the specified page',
100 $this->
addOption(
'dry-run',
'Parse dump without actually importing pages' );
101 $this->
addOption(
'debug',
'Output extra verbose debug information' );
102 $this->
addOption(
'uploads',
'Process file upload data if included (experimental)' );
105 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state'
107 $this->
addOption(
'image-base-path',
'Import files from a specified path',
false,
true );
108 $this->
addOption(
'skip-to',
'Start from nth page by skipping first n-1 pages',
false,
true );
110 'Prefix for interwiki usernames; a trailing ">" will be added. Default: "imported>"',
113 'Treat all usernames as interwiki. ' .
114 'The default is to assign edits to local users where they exist.',
117 $this->
addArg(
'file',
'Dump file to import [else use stdin]',
false );
122 $this->
fatalError(
"Wiki is in read-only mode; you'll need to disable it for import to work." );
125 $this->reportingInterval = intval( $this->
getOption(
'report', 100 ) );
126 if ( !$this->reportingInterval ) {
128 $this->reportingInterval = 100;
131 $this->dryRun = $this->
hasOption(
'dry-run' );
132 $this->uploads = $this->
hasOption(
'uploads' );
134 if ( $this->
hasOption(
'image-base-path' ) ) {
135 $this->imageBasePath = $this->
getOption(
'image-base-path' );
137 if ( $this->
hasOption(
'namespaces' ) ) {
138 $this->setNsfilter( explode(
'|', $this->
getOption(
'namespaces' ) ) );
141 if ( $this->
hasArg( 0 ) ) {
142 $this->importFromFile( $this->
getArg( 0 ) );
144 $this->importFromStdin();
147 $this->
output(
"Done!\n" );
148 $this->
output(
"You might want to run rebuildrecentchanges.php to regenerate RecentChanges,\n" );
149 $this->
output(
"and initSiteStats.php to update page and revision counts\n" );
152 private function setNsfilter( array $namespaces ) {
153 if ( count( $namespaces ) == 0 ) {
154 $this->nsFilter =
false;
158 $this->nsFilter = array_unique( array_map( [ $this,
'getNsIndex' ], $namespaces ) );
161 private function getNsIndex( $namespace ) {
163 $result = $contLang->getNsIndex( $namespace );
164 if ( $result !==
false ) {
167 $ns = intval( $namespace );
168 if ( strval( $ns ) === $namespace && $contLang->getNsText( $ns ) !==
false ) {
171 $this->
fatalError(
"Unknown namespace text / index specified: $namespace" );
178 private function skippedNamespace( $title ) {
179 if ( $title ===
null ) {
184 $ns = $title->getNamespace();
186 return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
199 $this->progress(
"Got bogus revision with null title!" );
204 if ( $this->skippedNamespace( $title ) ) {
211 if ( !$this->dryRun ) {
212 call_user_func( $this->importCallback, $rev );
221 if ( $this->uploads ) {
222 if ( $this->skippedNamespace( $revision->
getTitle() ) ) {
225 $this->uploadCount++;
227 $this->progress(
"upload: " . $revision->
getFilename() );
229 if ( !$this->dryRun ) {
233 $statusValue = $importer->import( $revision );
235 return $statusValue->isGood();
246 if ( $this->skippedNamespace( $rev->
getTitle() ) ) {
252 if ( !$this->dryRun ) {
253 call_user_func( $this->logItemCallback, $rev );
257 private function report( $final =
false ) {
258 if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
263 private function showReport() {
264 if ( !$this->mQuiet ) {
267 $rate = sprintf(
"%.2f", $this->pageCount / $delta );
268 $revrate = sprintf(
"%.2f", $this->revCount / $delta );
273 # Logs dumps don't have page tallies
274 if ( $this->pageCount ) {
275 $this->progress(
"$this->pageCount ($rate pages/sec $revrate revs/sec)" );
277 $this->progress(
"$this->revCount ($revrate revs/sec)" );
283 private function progress( $string ) {
284 fwrite( $this->stderr, $string .
"\n" );
287 private function importFromFile( $filename ) {
288 if ( preg_match(
'/\.gz$/', $filename ) ) {
289 $filename =
'compress.zlib://' . $filename;
290 } elseif ( preg_match(
'/\.bz2$/', $filename ) ) {
291 $filename =
'compress.bzip2://' . $filename;
292 } elseif ( preg_match(
'/\.7z$/', $filename ) ) {
293 $filename =
'mediawiki.compress.7z://' . $filename;
296 $file = fopen( $filename,
'rt' );
297 if ( $file ===
false ) {
298 $this->
fatalError( error_get_last()[
'message'] ??
'Could not open file' );
301 return $this->importFromHandle( $file );
304 private function importFromStdin() {
305 $file = fopen(
'php://stdin',
'rt' );
306 if ( self::posix_isatty( $file ) ) {
310 return $this->importFromHandle( $file );
313 private function importFromHandle( $handle ) {
314 $this->startTime = microtime(
true );
316 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [
'steal' =>
true ] );
320 ->getWikiImporterFactory()
324 $importer->disableStatisticsUpdate();
327 $importer->setDebug(
true );
329 if ( $this->
hasOption(
'no-updates' ) ) {
330 $importer->setNoUpdates(
true );
332 $importer->setUsernamePrefix(
333 $this->
getOption(
'username-prefix',
'imported' ),
337 $statusRootPage = $importer->setTargetRootPage( $this->
getOption(
'rootpage' ) );
338 if ( !$statusRootPage->isGood() ) {
344 $nthPage = (int)$this->
getOption(
'skip-to' );
345 $importer->setPageOffset( $nthPage );
346 $this->pageCount = $nthPage - 1;
348 $importer->setPageCallback( [ $this,
'reportPage' ] );
349 $importer->setNoticeCallback(
static function ( $msg,
$params ) {
352 $this->importCallback = $importer->setRevisionCallback(
353 [ $this,
'handleRevision' ] );
354 $this->uploadCallback = $importer->setUploadCallback(
355 [ $this,
'handleUpload' ] );
356 $this->logItemCallback = $importer->setLogItemCallback(
357 [ $this,
'handleLogItem' ] );
358 if ( $this->uploads ) {
359 $importer->setImportUploads(
true );
361 if ( $this->imageBasePath ) {
362 $importer->setImageBasePath( $this->imageBasePath );
365 if ( $this->dryRun ) {
366 $importer->setPageOutCallback(
null );
369 return $importer->doImport();
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasArg( $argId=0)
Does a given argument exist?
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.