Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 168 |
|
0.00% |
0 / 15 |
CRAP | |
0.00% |
0 / 1 |
BackupReader | |
0.00% |
0 / 168 |
|
0.00% |
0 / 15 |
2862 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
12 | |||
execute | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 | |||
setNsfilter | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getNsIndex | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
skippedNamespace | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
reportPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
handleRevision | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
handleUpload | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
handleLogItem | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
report | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
showReport | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
20 | |||
progress | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
importFromFile | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
importFromStdin | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
importFromHandle | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | /** |
3 | * Import XML dump files into the current wiki. |
4 | * |
5 | * Copyright © 2005 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Maintenance |
25 | */ |
26 | |
27 | use MediaWiki\Linker\LinkTarget; |
28 | use MediaWiki\Maintenance\Maintenance; |
29 | use MediaWiki\Permissions\UltimateAuthority; |
30 | use MediaWiki\User\User; |
31 | |
32 | // @codeCoverageIgnoreStart |
33 | require_once __DIR__ . '/Maintenance.php'; |
34 | // @codeCoverageIgnoreEnd |
35 | |
36 | /** |
37 | * Maintenance script that imports XML dump files into the current wiki. |
38 | * |
39 | * @ingroup Maintenance |
40 | */ |
41 | class BackupReader extends Maintenance { |
42 | /** @var int */ |
43 | public $reportingInterval = 100; |
44 | /** @var int */ |
45 | public $pageCount = 0; |
46 | /** @var int */ |
47 | public $revCount = 0; |
48 | /** @var bool */ |
49 | public $dryRun = false; |
50 | /** @var bool */ |
51 | public $uploads = false; |
52 | /** @var int */ |
53 | protected $uploadCount = 0; |
54 | /** @var string|false */ |
55 | public $imageBasePath = false; |
56 | /** @var array|false */ |
57 | public $nsFilter = false; |
58 | /** @var resource|false */ |
59 | public $stderr; |
60 | /** @var callable|null */ |
61 | protected $importCallback; |
62 | /** @var callable|null */ |
63 | protected $logItemCallback; |
64 | /** @var callable|null */ |
65 | protected $uploadCallback; |
66 | /** @var float */ |
67 | protected $startTime; |
68 | |
69 | public function __construct() { |
70 | parent::__construct(); |
71 | $gz = in_array( 'compress.zlib', stream_get_wrappers() ) |
72 | ? 'ok' |
73 | : '(disabled; requires PHP zlib module)'; |
74 | $bz2 = in_array( 'compress.bzip2', stream_get_wrappers() ) |
75 | ? 'ok' |
76 | : '(disabled; requires PHP bzip2 module)'; |
77 | |
78 | $this->addDescription( |
79 | <<<TEXT |
80 | This script reads pages from an XML file as produced from Special:Export or |
81 | dumpBackup.php, and saves them into the current wiki. |
82 | |
83 | Compressed XML files may be read directly: |
84 | .gz $gz |
85 | .bz2 $bz2 |
86 | .7z (if 7za executable is in PATH) |
87 | |
88 | Note that for very large data sets, importDump.php may be slow; there are |
89 | alternate methods which can be much faster for full site restoration: |
90 | <https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps> |
91 | TEXT |
92 | ); |
93 | $this->stderr = fopen( "php://stderr", "wt" ); |
94 | $this->addOption( 'report', |
95 | 'Report position and speed after every n pages processed', false, true ); |
96 | $this->addOption( 'namespaces', |
97 | 'Import only the pages from namespaces belonging to the list of ' . |
98 | 'pipe-separated namespace names or namespace indexes', false, true ); |
99 | $this->addOption( 'rootpage', 'Pages will be imported as subpages of the specified page', |
100 | false, true ); |
101 | $this->addOption( 'dry-run', 'Parse dump without actually importing pages' ); |
102 | $this->addOption( 'debug', 'Output extra verbose debug information' ); |
103 | $this->addOption( 'uploads', 'Process file upload data if included (experimental)' ); |
104 | $this->addOption( |
105 | 'no-updates', |
106 | 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state' |
107 | ); |
108 | $this->addOption( 'image-base-path', 'Import files from a specified path', false, true ); |
109 | $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true ); |
110 | $this->addOption( 'username-prefix', |
111 | 'Prefix for interwiki usernames; a trailing ">" will be added. Default: "imported>"', |
112 | false, true ); |
113 | $this->addOption( 'no-local-users', |
114 | 'Treat all usernames as interwiki. ' . |
115 | 'The default is to assign edits to local users where they exist.', |
116 | false, false |
117 | ); |
118 | $this->addArg( 'file', 'Dump file to import [else use stdin]', false ); |
119 | } |
120 | |
121 | public function execute() { |
122 | if ( $this->getServiceContainer()->getReadOnlyMode()->isReadOnly() ) { |
123 | $this->fatalError( "Wiki is in read-only mode; you'll need to disable it for import to work." ); |
124 | } |
125 | |
126 | $this->reportingInterval = intval( $this->getOption( 'report', 100 ) ); |
127 | if ( !$this->reportingInterval ) { |
128 | // avoid division by zero |
129 | $this->reportingInterval = 100; |
130 | } |
131 | |
132 | $this->dryRun = $this->hasOption( 'dry-run' ); |
133 | $this->uploads = $this->hasOption( 'uploads' ); |
134 | |
135 | if ( $this->hasOption( 'image-base-path' ) ) { |
136 | $this->imageBasePath = $this->getOption( 'image-base-path' ); |
137 | } |
138 | if ( $this->hasOption( 'namespaces' ) ) { |
139 | $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) ); |
140 | } |
141 | |
142 | if ( $this->hasArg( 0 ) ) { |
143 | $this->importFromFile( $this->getArg( 0 ) ); |
144 | } else { |
145 | $this->importFromStdin(); |
146 | } |
147 | |
148 | $this->output( "Done!\n" ); |
149 | $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges,\n" ); |
150 | $this->output( "and initSiteStats.php to update page and revision counts\n" ); |
151 | } |
152 | |
153 | private function setNsfilter( array $namespaces ) { |
154 | if ( count( $namespaces ) == 0 ) { |
155 | $this->nsFilter = false; |
156 | |
157 | return; |
158 | } |
159 | $this->nsFilter = array_unique( array_map( [ $this, 'getNsIndex' ], $namespaces ) ); |
160 | } |
161 | |
162 | private function getNsIndex( $namespace ) { |
163 | $contLang = $this->getServiceContainer()->getContentLanguage(); |
164 | $result = $contLang->getNsIndex( $namespace ); |
165 | if ( $result !== false ) { |
166 | return $result; |
167 | } |
168 | $ns = intval( $namespace ); |
169 | if ( strval( $ns ) === $namespace && $contLang->getNsText( $ns ) !== false ) { |
170 | return $ns; |
171 | } |
172 | $this->fatalError( "Unknown namespace text / index specified: $namespace" ); |
173 | } |
174 | |
175 | /** |
176 | * @param LinkTarget|null $title |
177 | * @return bool |
178 | */ |
179 | private function skippedNamespace( $title ) { |
180 | if ( $title === null ) { |
181 | // Probably a log entry |
182 | return false; |
183 | } |
184 | |
185 | $ns = $title->getNamespace(); |
186 | |
187 | return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter ); |
188 | } |
189 | |
190 | public function reportPage( $page ) { |
191 | $this->pageCount++; |
192 | } |
193 | |
194 | /** |
195 | * @param WikiRevision $rev |
196 | */ |
197 | public function handleRevision( WikiRevision $rev ) { |
198 | $title = $rev->getTitle(); |
199 | if ( !$title ) { |
200 | $this->progress( "Got bogus revision with null title!" ); |
201 | |
202 | return; |
203 | } |
204 | |
205 | if ( $this->skippedNamespace( $title ) ) { |
206 | return; |
207 | } |
208 | |
209 | $this->revCount++; |
210 | $this->report(); |
211 | |
212 | if ( !$this->dryRun ) { |
213 | call_user_func( $this->importCallback, $rev ); |
214 | } |
215 | } |
216 | |
217 | /** |
218 | * @param WikiRevision $revision |
219 | * @return bool |
220 | */ |
221 | public function handleUpload( WikiRevision $revision ) { |
222 | if ( $this->uploads ) { |
223 | if ( $this->skippedNamespace( $revision->getTitle() ) ) { |
224 | return false; |
225 | } |
226 | $this->uploadCount++; |
227 | // $this->report(); |
228 | $this->progress( "upload: " . $revision->getFilename() ); |
229 | |
230 | if ( !$this->dryRun ) { |
231 | // bluuuh hack |
232 | // call_user_func( $this->uploadCallback, $revision ); |
233 | $importer = $this->getServiceContainer()->getWikiRevisionUploadImporter(); |
234 | $statusValue = $importer->import( $revision ); |
235 | |
236 | return $statusValue->isGood(); |
237 | } |
238 | } |
239 | |
240 | return false; |
241 | } |
242 | |
243 | /** |
244 | * @param WikiRevision $rev |
245 | */ |
246 | public function handleLogItem( WikiRevision $rev ) { |
247 | if ( $this->skippedNamespace( $rev->getTitle() ) ) { |
248 | return; |
249 | } |
250 | $this->revCount++; |
251 | $this->report(); |
252 | |
253 | if ( !$this->dryRun ) { |
254 | call_user_func( $this->logItemCallback, $rev ); |
255 | } |
256 | } |
257 | |
258 | private function report( $final = false ) { |
259 | if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) { |
260 | $this->showReport(); |
261 | } |
262 | } |
263 | |
264 | private function showReport() { |
265 | if ( !$this->mQuiet ) { |
266 | $delta = microtime( true ) - $this->startTime; |
267 | if ( $delta ) { |
268 | $rate = sprintf( "%.2f", $this->pageCount / $delta ); |
269 | $revrate = sprintf( "%.2f", $this->revCount / $delta ); |
270 | } else { |
271 | $rate = '-'; |
272 | $revrate = '-'; |
273 | } |
274 | # Logs dumps don't have page tallies |
275 | if ( $this->pageCount ) { |
276 | $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" ); |
277 | } else { |
278 | $this->progress( "$this->revCount ($revrate revs/sec)" ); |
279 | } |
280 | } |
281 | $this->waitForReplication(); |
282 | } |
283 | |
284 | private function progress( $string ) { |
285 | fwrite( $this->stderr, $string . "\n" ); |
286 | } |
287 | |
288 | private function importFromFile( $filename ) { |
289 | if ( preg_match( '/\.gz$/', $filename ) ) { |
290 | $filename = 'compress.zlib://' . $filename; |
291 | } elseif ( preg_match( '/\.bz2$/', $filename ) ) { |
292 | $filename = 'compress.bzip2://' . $filename; |
293 | } elseif ( preg_match( '/\.7z$/', $filename ) ) { |
294 | $filename = 'mediawiki.compress.7z://' . $filename; |
295 | } |
296 | |
297 | $file = fopen( $filename, 'rt' ); |
298 | if ( $file === false ) { |
299 | $this->fatalError( error_get_last()['message'] ?? 'Could not open file' ); |
300 | } |
301 | |
302 | return $this->importFromHandle( $file ); |
303 | } |
304 | |
305 | private function importFromStdin() { |
306 | $file = fopen( 'php://stdin', 'rt' ); |
307 | if ( self::posix_isatty( $file ) ) { |
308 | $this->maybeHelp( true ); |
309 | } |
310 | |
311 | return $this->importFromHandle( $file ); |
312 | } |
313 | |
314 | private function importFromHandle( $handle ) { |
315 | $this->startTime = microtime( true ); |
316 | |
317 | $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] ); |
318 | |
319 | $source = new ImportStreamSource( $handle ); |
320 | $importer = $this->getServiceContainer() |
321 | ->getWikiImporterFactory() |
322 | ->getWikiImporter( $source, new UltimateAuthority( $user ) ); |
323 | |
324 | // Updating statistics require a lot of time so disable it |
325 | $importer->disableStatisticsUpdate(); |
326 | |
327 | if ( $this->hasOption( 'debug' ) ) { |
328 | $importer->setDebug( true ); |
329 | } |
330 | if ( $this->hasOption( 'no-updates' ) ) { |
331 | $importer->setNoUpdates( true ); |
332 | } |
333 | $importer->setUsernamePrefix( |
334 | $this->getOption( 'username-prefix', 'imported' ), |
335 | !$this->hasOption( 'no-local-users' ) |
336 | ); |
337 | if ( $this->hasOption( 'rootpage' ) ) { |
338 | $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) ); |
339 | if ( !$statusRootPage->isGood() ) { |
340 | // Die here so that it doesn't print "Done!" |
341 | $this->fatalError( $statusRootPage ); |
342 | } |
343 | } |
344 | if ( $this->hasOption( 'skip-to' ) ) { |
345 | $nthPage = (int)$this->getOption( 'skip-to' ); |
346 | $importer->setPageOffset( $nthPage ); |
347 | $this->pageCount = $nthPage - 1; |
348 | } |
349 | $importer->setPageCallback( [ $this, 'reportPage' ] ); |
350 | $importer->setNoticeCallback( static function ( $msg, $params ) { |
351 | echo wfMessage( $msg, $params )->text() . "\n"; |
352 | } ); |
353 | $this->importCallback = $importer->setRevisionCallback( |
354 | [ $this, 'handleRevision' ] ); |
355 | $this->uploadCallback = $importer->setUploadCallback( |
356 | [ $this, 'handleUpload' ] ); |
357 | $this->logItemCallback = $importer->setLogItemCallback( |
358 | [ $this, 'handleLogItem' ] ); |
359 | if ( $this->uploads ) { |
360 | $importer->setImportUploads( true ); |
361 | } |
362 | if ( $this->imageBasePath ) { |
363 | $importer->setImageBasePath( $this->imageBasePath ); |
364 | } |
365 | |
366 | if ( $this->dryRun ) { |
367 | $importer->setPageOutCallback( null ); |
368 | } |
369 | |
370 | return $importer->doImport(); |
371 | } |
372 | } |
373 | |
374 | // @codeCoverageIgnoreStart |
375 | $maintClass = BackupReader::class; |
376 | require_once RUN_MAINTENANCE_IF_MAIN; |
377 | // @codeCoverageIgnoreEnd |