MediaWiki REL1_33
importDump.php
Go to the documentation of this file.
1<?php
28
29require_once __DIR__ . '/Maintenance.php';
30
37 public $reportingInterval = 100;
38 public $pageCount = 0;
39 public $revCount = 0;
40 public $dryRun = false;
41 public $uploads = false;
42 protected $uploadCount = 0;
43 public $imageBasePath = false;
44 public $nsFilter = false;
45
46 function __construct() {
47 parent::__construct();
48 $gz = in_array( 'compress.zlib', stream_get_wrappers() )
49 ? 'ok'
50 : '(disabled; requires PHP zlib module)';
51 $bz2 = in_array( 'compress.bzip2', stream_get_wrappers() )
52 ? 'ok'
53 : '(disabled; requires PHP bzip2 module)';
54
55 $this->addDescription(
56 <<<TEXT
57This script reads pages from an XML file as produced from Special:Export or
58dumpBackup.php, and saves them into the current wiki.
59
60Compressed XML files may be read directly:
61 .gz $gz
62 .bz2 $bz2
63 .7z (if 7za executable is in PATH)
64
65Note that for very large data sets, importDump.php may be slow; there are
66alternate methods which can be much faster for full site restoration:
67<https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps>
68TEXT
69 );
70 $this->stderr = fopen( "php://stderr", "wt" );
71 $this->addOption( 'report',
72 'Report position and speed after every n pages processed', false, true );
73 $this->addOption( 'namespaces',
74 'Import only the pages from namespaces belonging to the list of ' .
75 'pipe-separated namespace names or namespace indexes', false, true );
76 $this->addOption( 'rootpage', 'Pages will be imported as subpages of the specified page',
77 false, true );
78 $this->addOption( 'dry-run', 'Parse dump without actually importing pages' );
79 $this->addOption( 'debug', 'Output extra verbose debug information' );
80 $this->addOption( 'uploads', 'Process file upload data if included (experimental)' );
81 $this->addOption(
82 'no-updates',
83 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state'
84 );
85 $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
86 $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true );
87 $this->addOption( 'username-prefix', 'Prefix for interwiki usernames', false, true );
88 $this->addOption( 'no-local-users',
89 'Treat all usernames as interwiki. ' .
90 'The default is to assign edits to local users where they exist.',
91 false, false
92 );
93 $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
94 }
95
96 public function execute() {
97 if ( wfReadOnly() ) {
98 $this->fatalError( "Wiki is in read-only mode; you'll need to disable it for import to work." );
99 }
100
101 $this->reportingInterval = intval( $this->getOption( 'report', 100 ) );
102 if ( !$this->reportingInterval ) {
103 $this->reportingInterval = 100; // avoid division by zero
104 }
105
106 $this->dryRun = $this->hasOption( 'dry-run' );
107 $this->uploads = $this->hasOption( 'uploads' ); // experimental!
108 if ( $this->hasOption( 'image-base-path' ) ) {
109 $this->imageBasePath = $this->getOption( 'image-base-path' );
110 }
111 if ( $this->hasOption( 'namespaces' ) ) {
112 $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) );
113 }
114
115 if ( $this->hasArg( 0 ) ) {
116 $this->importFromFile( $this->getArg( 0 ) );
117 } else {
118 $this->importFromStdin();
119 }
120
121 $this->output( "Done!\n" );
122 $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges,\n" );
123 $this->output( "and initSiteStats.php to update page and revision counts\n" );
124 }
125
127 if ( count( $namespaces ) == 0 ) {
128 $this->nsFilter = false;
129
130 return;
131 }
132 $this->nsFilter = array_unique( array_map( [ $this, 'getNsIndex' ], $namespaces ) );
133 }
134
135 private function getNsIndex( $namespace ) {
136 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
137 $result = $contLang->getNsIndex( $namespace );
138 if ( $result !== false ) {
139 return $result;
140 }
141 $ns = intval( $namespace );
142 if ( strval( $ns ) === $namespace && $contLang->getNsText( $ns ) !== false ) {
143 return $ns;
144 }
145 $this->fatalError( "Unknown namespace text / index specified: $namespace" );
146 }
147
153 private function skippedNamespace( $obj ) {
154 $title = null;
155 if ( $obj instanceof Title ) {
156 $title = $obj;
157 } elseif ( $obj instanceof Revision ) {
158 $title = $obj->getTitle();
159 } elseif ( $obj instanceof WikiRevision ) {
160 $title = $obj->title;
161 } else {
162 throw new MWException( "Cannot get namespace of object in " . __METHOD__ );
163 }
164
165 if ( is_null( $title ) ) {
166 // Probably a log entry
167 return false;
168 }
169
170 $ns = $title->getNamespace();
171
172 return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
173 }
174
175 function reportPage( $page ) {
176 $this->pageCount++;
177 }
178
182 function handleRevision( $rev ) {
183 $title = $rev->getTitle();
184 if ( !$title ) {
185 $this->progress( "Got bogus revision with null title!" );
186
187 return;
188 }
189
190 if ( $this->skippedNamespace( $title ) ) {
191 return;
192 }
193
194 $this->revCount++;
195 $this->report();
196
197 if ( !$this->dryRun ) {
198 call_user_func( $this->importCallback, $rev );
199 }
200 }
201
206 function handleUpload( $revision ) {
207 if ( $this->uploads ) {
208 if ( $this->skippedNamespace( $revision ) ) {
209 return false;
210 }
211 $this->uploadCount++;
212 // $this->report();
213 $this->progress( "upload: " . $revision->getFilename() );
214
215 if ( !$this->dryRun ) {
216 // bluuuh hack
217 // call_user_func( $this->uploadCallback, $revision );
218 $dbw = $this->getDB( DB_MASTER );
219
220 return $dbw->deadlockLoop( [ $revision, 'importUpload' ] );
221 }
222 }
223
224 return false;
225 }
226
227 function handleLogItem( $rev ) {
228 if ( $this->skippedNamespace( $rev ) ) {
229 return;
230 }
231 $this->revCount++;
232 $this->report();
233
234 if ( !$this->dryRun ) {
235 call_user_func( $this->logItemCallback, $rev );
236 }
237 }
238
239 function report( $final = false ) {
240 if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
241 $this->showReport();
242 }
243 }
244
245 function showReport() {
246 if ( !$this->mQuiet ) {
247 $delta = microtime( true ) - $this->startTime;
248 if ( $delta ) {
249 $rate = sprintf( "%.2f", $this->pageCount / $delta );
250 $revrate = sprintf( "%.2f", $this->revCount / $delta );
251 } else {
252 $rate = '-';
253 $revrate = '-';
254 }
255 # Logs dumps don't have page tallies
256 if ( $this->pageCount ) {
257 $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" );
258 } else {
259 $this->progress( "$this->revCount ($revrate revs/sec)" );
260 }
261 }
263 }
264
265 function progress( $string ) {
266 fwrite( $this->stderr, $string . "\n" );
267 }
268
269 function importFromFile( $filename ) {
270 if ( preg_match( '/\.gz$/', $filename ) ) {
271 $filename = 'compress.zlib://' . $filename;
272 } elseif ( preg_match( '/\.bz2$/', $filename ) ) {
273 $filename = 'compress.bzip2://' . $filename;
274 } elseif ( preg_match( '/\.7z$/', $filename ) ) {
275 $filename = 'mediawiki.compress.7z://' . $filename;
276 }
277
278 $file = fopen( $filename, 'rt' );
279
280 return $this->importFromHandle( $file );
281 }
282
283 function importFromStdin() {
284 $file = fopen( 'php://stdin', 'rt' );
285 if ( self::posix_isatty( $file ) ) {
286 $this->maybeHelp( true );
287 }
288
289 return $this->importFromHandle( $file );
290 }
291
292 function importFromHandle( $handle ) {
293 $this->startTime = microtime( true );
294
295 $source = new ImportStreamSource( $handle );
296 $importer = new WikiImporter( $source, $this->getConfig() );
297
298 // Updating statistics require a lot of time so disable it
299 $importer->disableStatisticsUpdate();
300
301 if ( $this->hasOption( 'debug' ) ) {
302 $importer->setDebug( true );
303 }
304 if ( $this->hasOption( 'no-updates' ) ) {
305 $importer->setNoUpdates( true );
306 }
307 if ( $this->hasOption( 'username-prefix' ) ) {
308 $importer->setUsernamePrefix(
309 $this->getOption( 'username-prefix' ),
310 !$this->hasOption( 'no-local-users' )
311 );
312 }
313 if ( $this->hasOption( 'rootpage' ) ) {
314 $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) );
315 if ( !$statusRootPage->isGood() ) {
316 // Die here so that it doesn't print "Done!"
317 $this->fatalError( $statusRootPage->getMessage()->text() );
318 return false;
319 }
320 }
321 if ( $this->hasOption( 'skip-to' ) ) {
322 $nthPage = (int)$this->getOption( 'skip-to' );
323 $importer->setPageOffset( $nthPage );
324 $this->pageCount = $nthPage - 1;
325 }
326 $importer->setPageCallback( [ $this, 'reportPage' ] );
327 $importer->setNoticeCallback( function ( $msg, $params ) {
328 echo wfMessage( $msg, $params )->text() . "\n";
329 } );
330 $this->importCallback = $importer->setRevisionCallback(
331 [ $this, 'handleRevision' ] );
332 $this->uploadCallback = $importer->setUploadCallback(
333 [ $this, 'handleUpload' ] );
334 $this->logItemCallback = $importer->setLogItemCallback(
335 [ $this, 'handleLogItem' ] );
336 if ( $this->uploads ) {
337 $importer->setImportUploads( true );
338 }
339 if ( $this->imageBasePath ) {
340 $importer->setImageBasePath( $this->imageBasePath );
341 }
342
343 if ( $this->dryRun ) {
344 $importer->setPageOutCallback( null );
345 }
346
347 return $importer->doImport();
348 }
349}
350
351$maintClass = BackupReader::class;
352require_once RUN_MAINTENANCE_IF_MAIN;
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
c Accompany it with the information you received as to the offer to distribute corresponding source complete source code means all the source code for all modules it plus any associated interface definition files
Definition COPYING.txt:158
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
wfReadOnly()
Check whether the wiki is in read-only mode.
Maintenance script that imports XML dump files into the current wiki.
getNsIndex( $namespace)
importFromFile( $filename)
handleRevision( $rev)
execute()
Do the actual work.
report( $final=false)
__construct()
Default constructor.
reportPage( $page)
handleUpload( $revision)
handleLogItem( $rev)
importFromHandle( $handle)
skippedNamespace( $obj)
progress( $string)
setNsfilter(array $namespaces)
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
MediaWiki exception.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasArg( $argId=0)
Does a given argument exist?
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular option exists.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
maybeHelp( $force=false)
Maybe show the help.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:40
XML file reader for the page data importer.
Represents a revision, log entry or upload during the import process.
The ContentHandler facility adds support for arbitrary content types on wiki pages
Use of locking reads(e.g. the FOR UPDATE clause) is not advised. They are poorly implemented in InnoDB and will cause regular deadlock errors. It 's also surprisingly easy to cripple the wiki with lock contention. Instead of locking reads
namespace being checked & $result
Definition hooks.txt:2340
namespace and then decline to actually register it & $namespaces
Definition hooks.txt:925
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition hooks.txt:1779
script(document.cookie)%253c/script%253e</pre ></div > !! end !! test XSS is escaped(inline) !!input< source lang
$maintClass
require_once RUN_MAINTENANCE_IF_MAIN
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
$source
const DB_MASTER
Definition defines.php:26
scripts txt MediaWiki primary scripts are in the root directory of the software Users should only use these scripts to access the wiki There are also some php that aren t primary scripts but helper files and won t work if they are accessed directly by the web Primary see https
Definition scripts.txt:24
$params