MediaWiki fundraising/REL1_35
importDump.php
Go to the documentation of this file.
1<?php
29
30require_once __DIR__ . '/Maintenance.php';
31
38 public $reportingInterval = 100;
39 public $pageCount = 0;
40 public $revCount = 0;
41 public $dryRun = false;
42 public $uploads = false;
43 protected $uploadCount = 0;
44 public $imageBasePath = false;
46 public $nsFilter = false;
48 public $stderr;
50 protected $importCallback;
54 protected $uploadCallback;
56 protected $startTime;
57
58 public function __construct() {
59 parent::__construct();
60 $gz = in_array( 'compress.zlib', stream_get_wrappers() )
61 ? 'ok'
62 : '(disabled; requires PHP zlib module)';
63 $bz2 = in_array( 'compress.bzip2', stream_get_wrappers() )
64 ? 'ok'
65 : '(disabled; requires PHP bzip2 module)';
66
67 $this->addDescription(
68 <<<TEXT
69This script reads pages from an XML file as produced from Special:Export or
70dumpBackup.php, and saves them into the current wiki.
71
72Compressed XML files may be read directly:
73 .gz $gz
74 .bz2 $bz2
75 .7z (if 7za executable is in PATH)
76
77Note that for very large data sets, importDump.php may be slow; there are
78alternate methods which can be much faster for full site restoration:
79<https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps>
80TEXT
81 );
82 $this->stderr = fopen( "php://stderr", "wt" );
83 $this->addOption( 'report',
84 'Report position and speed after every n pages processed', false, true );
85 $this->addOption( 'namespaces',
86 'Import only the pages from namespaces belonging to the list of ' .
87 'pipe-separated namespace names or namespace indexes', false, true );
88 $this->addOption( 'rootpage', 'Pages will be imported as subpages of the specified page',
89 false, true );
90 $this->addOption( 'dry-run', 'Parse dump without actually importing pages' );
91 $this->addOption( 'debug', 'Output extra verbose debug information' );
92 $this->addOption( 'uploads', 'Process file upload data if included (experimental)' );
93 $this->addOption(
94 'no-updates',
95 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state'
96 );
97 $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
98 $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true );
99 $this->addOption( 'username-prefix', 'Prefix for interwiki usernames', false, true );
100 $this->addOption( 'no-local-users',
101 'Treat all usernames as interwiki. ' .
102 'The default is to assign edits to local users where they exist.',
103 false, false
104 );
105 $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
106 }
107
108 public function execute() {
109 if ( wfReadOnly() ) {
110 $this->fatalError( "Wiki is in read-only mode; you'll need to disable it for import to work." );
111 }
112
113 $this->reportingInterval = intval( $this->getOption( 'report', 100 ) );
114 if ( !$this->reportingInterval ) {
115 // avoid division by zero
116 $this->reportingInterval = 100;
117 }
118
119 $this->dryRun = $this->hasOption( 'dry-run' );
120 $this->uploads = $this->hasOption( 'uploads' );
121
122 if ( $this->hasOption( 'image-base-path' ) ) {
123 $this->imageBasePath = $this->getOption( 'image-base-path' );
124 }
125 if ( $this->hasOption( 'namespaces' ) ) {
126 $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) );
127 }
128
129 if ( $this->hasArg( 0 ) ) {
130 $this->importFromFile( $this->getArg( 0 ) );
131 } else {
132 $this->importFromStdin();
133 }
134
135 $this->output( "Done!\n" );
136 $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges,\n" );
137 $this->output( "and initSiteStats.php to update page and revision counts\n" );
138 }
139
140 private function setNsfilter( array $namespaces ) {
141 if ( count( $namespaces ) == 0 ) {
142 $this->nsFilter = false;
143
144 return;
145 }
146 $this->nsFilter = array_unique( array_map( [ $this, 'getNsIndex' ], $namespaces ) );
147 }
148
149 private function getNsIndex( $namespace ) {
150 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
151 $result = $contLang->getNsIndex( $namespace );
152 if ( $result !== false ) {
153 return $result;
154 }
155 $ns = intval( $namespace );
156 if ( strval( $ns ) === $namespace && $contLang->getNsText( $ns ) !== false ) {
157 return $ns;
158 }
159 $this->fatalError( "Unknown namespace text / index specified: $namespace" );
160 }
161
167 private function skippedNamespace( $title ) {
168 if ( $title === null ) {
169 // Probably a log entry
170 return false;
171 }
172
173 $ns = $title->getNamespace();
174
175 return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
176 }
177
178 public function reportPage( $page ) {
179 $this->pageCount++;
180 }
181
185 public function handleRevision( WikiRevision $rev ) {
186 $title = $rev->getTitle();
187 if ( !$title ) {
188 $this->progress( "Got bogus revision with null title!" );
189
190 return;
191 }
192
193 if ( $this->skippedNamespace( $title ) ) {
194 return;
195 }
196
197 $this->revCount++;
198 $this->report();
199
200 if ( !$this->dryRun ) {
201 call_user_func( $this->importCallback, $rev );
202 }
203 }
204
209 public function handleUpload( WikiRevision $revision ) {
210 if ( $this->uploads ) {
211 if ( $this->skippedNamespace( $revision->getTitle() ) ) {
212 return false;
213 }
214 $this->uploadCount++;
215 // $this->report();
216 $this->progress( "upload: " . $revision->getFilename() );
217
218 if ( !$this->dryRun ) {
219 // bluuuh hack
220 // call_user_func( $this->uploadCallback, $revision );
221 $dbw = $this->getDB( DB_MASTER );
222
223 return $dbw->deadlockLoop( [ $revision, 'importUpload' ] );
224 }
225 }
226
227 return false;
228 }
229
233 public function handleLogItem( WikiRevision $rev ) {
234 if ( $this->skippedNamespace( $rev->getTitle() ) ) {
235 return;
236 }
237 $this->revCount++;
238 $this->report();
239
240 if ( !$this->dryRun ) {
241 call_user_func( $this->logItemCallback, $rev );
242 }
243 }
244
245 private function report( $final = false ) {
246 if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
247 $this->showReport();
248 }
249 }
250
251 private function showReport() {
252 if ( !$this->mQuiet ) {
253 $delta = microtime( true ) - $this->startTime;
254 if ( $delta ) {
255 $rate = sprintf( "%.2f", $this->pageCount / $delta );
256 $revrate = sprintf( "%.2f", $this->revCount / $delta );
257 } else {
258 $rate = '-';
259 $revrate = '-';
260 }
261 # Logs dumps don't have page tallies
262 if ( $this->pageCount ) {
263 $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" );
264 } else {
265 $this->progress( "$this->revCount ($revrate revs/sec)" );
266 }
267 }
268 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
269 }
270
271 private function progress( $string ) {
272 fwrite( $this->stderr, $string . "\n" );
273 }
274
275 private function importFromFile( $filename ) {
276 if ( preg_match( '/\.gz$/', $filename ) ) {
277 $filename = 'compress.zlib://' . $filename;
278 } elseif ( preg_match( '/\.bz2$/', $filename ) ) {
279 $filename = 'compress.bzip2://' . $filename;
280 } elseif ( preg_match( '/\.7z$/', $filename ) ) {
281 $filename = 'mediawiki.compress.7z://' . $filename;
282 }
283
284 $file = fopen( $filename, 'rt' );
285
286 return $this->importFromHandle( $file );
287 }
288
289 private function importFromStdin() {
290 $file = fopen( 'php://stdin', 'rt' );
291 if ( self::posix_isatty( $file ) ) {
292 $this->maybeHelp( true );
293 }
294
295 return $this->importFromHandle( $file );
296 }
297
298 private function importFromHandle( $handle ) {
299 $this->startTime = microtime( true );
300
301 $source = new ImportStreamSource( $handle );
302 $importer = new WikiImporter( $source, $this->getConfig() );
303
304 // Updating statistics require a lot of time so disable it
305 $importer->disableStatisticsUpdate();
306
307 if ( $this->hasOption( 'debug' ) ) {
308 $importer->setDebug( true );
309 }
310 if ( $this->hasOption( 'no-updates' ) ) {
311 $importer->setNoUpdates( true );
312 }
313 if ( $this->hasOption( 'username-prefix' ) ) {
314 $importer->setUsernamePrefix(
315 $this->getOption( 'username-prefix' ),
316 !$this->hasOption( 'no-local-users' )
317 );
318 }
319 if ( $this->hasOption( 'rootpage' ) ) {
320 $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) );
321 if ( !$statusRootPage->isGood() ) {
322 // Die here so that it doesn't print "Done!"
323 $this->fatalError( $statusRootPage->getMessage( false, false, 'en' )->text() );
324 return false;
325 }
326 }
327 if ( $this->hasOption( 'skip-to' ) ) {
328 $nthPage = (int)$this->getOption( 'skip-to' );
329 $importer->setPageOffset( $nthPage );
330 $this->pageCount = $nthPage - 1;
331 }
332 $importer->setPageCallback( [ $this, 'reportPage' ] );
333 $importer->setNoticeCallback( function ( $msg, $params ) {
334 echo wfMessage( $msg, $params )->text() . "\n";
335 } );
336 $this->importCallback = $importer->setRevisionCallback(
337 [ $this, 'handleRevision' ] );
338 $this->uploadCallback = $importer->setUploadCallback(
339 [ $this, 'handleUpload' ] );
340 $this->logItemCallback = $importer->setLogItemCallback(
341 [ $this, 'handleLogItem' ] );
342 if ( $this->uploads ) {
343 $importer->setImportUploads( true );
344 }
345 if ( $this->imageBasePath ) {
346 $importer->setImageBasePath( $this->imageBasePath );
347 }
348
349 if ( $this->dryRun ) {
350 $importer->setPageOutCallback( null );
351 }
352
353 return $importer->doImport();
354 }
355}
356
357$maintClass = BackupReader::class;
358require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
wfReadOnly()
Check whether the wiki is in read-only mode.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
const RUN_MAINTENANCE_IF_MAIN
Maintenance script that imports XML dump files into the current wiki.
handleRevision(WikiRevision $rev)
getNsIndex( $namespace)
importFromFile( $filename)
callable null $logItemCallback
callable null $uploadCallback
array false $nsFilter
execute()
Do the actual work.
handleLogItem(WikiRevision $rev)
skippedNamespace( $title)
bool resource $stderr
report( $final=false)
__construct()
Default constructor.
reportPage( $page)
importFromHandle( $handle)
progress( $string)
setNsfilter(array $namespaces)
callable null $importCallback
handleUpload(WikiRevision $revision)
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasArg( $argId=0)
Does a given argument exist?
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
maybeHelp( $force=false)
Maybe show the help.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
XML file reader for the page data importer.
Represents a revision, log entry or upload during the import process.
$maintClass
$source
const DB_MASTER
Definition defines.php:29
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42