Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 168 |
|
0.00% |
0 / 15 |
CRAP | |
0.00% |
0 / 1 |
BackupReader | |
0.00% |
0 / 168 |
|
0.00% |
0 / 15 |
2862 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
12 | |||
execute | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 | |||
setNsfilter | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getNsIndex | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
skippedNamespace | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
reportPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
handleRevision | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
handleUpload | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
handleLogItem | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
report | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
showReport | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
20 | |||
progress | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
importFromFile | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
importFromStdin | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
importFromHandle | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | /** |
3 | * Import XML dump files into the current wiki. |
4 | * |
5 | * Copyright © 2005 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Maintenance |
25 | */ |
26 | |
27 | use MediaWiki\Linker\LinkTarget; |
28 | use MediaWiki\Maintenance\Maintenance; |
29 | use MediaWiki\Permissions\UltimateAuthority; |
30 | use MediaWiki\User\User; |
31 | |
32 | // @codeCoverageIgnoreStart |
33 | require_once __DIR__ . '/Maintenance.php'; |
34 | // @codeCoverageIgnoreEnd |
35 | |
36 | /** |
37 | * Maintenance script that imports XML dump files into the current wiki. |
38 | * |
39 | * @ingroup Maintenance |
40 | */ |
41 | class BackupReader extends Maintenance { |
42 | /** @var int */ |
43 | public $reportingInterval = 100; |
44 | /** @var int */ |
45 | public $pageCount = 0; |
46 | /** @var int */ |
47 | public $revCount = 0; |
48 | /** @var bool */ |
49 | public $dryRun = false; |
50 | /** @var bool */ |
51 | public $uploads = false; |
52 | /** @var int */ |
53 | protected $uploadCount = 0; |
54 | /** @var string|false */ |
55 | public $imageBasePath = false; |
56 | /** @var array|false */ |
57 | public $nsFilter = false; |
58 | /** @var resource|false */ |
59 | public $stderr; |
60 | /** @var callable|null */ |
61 | protected $importCallback; |
62 | /** @var callable|null */ |
63 | protected $logItemCallback; |
64 | /** @var callable|null */ |
65 | protected $uploadCallback; |
66 | /** @var float */ |
67 | protected $startTime; |
68 | |
69 | public function __construct() { |
70 | parent::__construct(); |
71 | $gz = in_array( 'compress.zlib', stream_get_wrappers() ) |
72 | ? 'ok' |
73 | : '(disabled; requires PHP zlib module)'; |
74 | $bz2 = in_array( 'compress.bzip2', stream_get_wrappers() ) |
75 | ? 'ok' |
76 | : '(disabled; requires PHP bzip2 module)'; |
77 | |
78 | $this->addDescription( |
79 | <<<TEXT |
80 | This script reads pages from an XML file as produced from Special:Export or |
81 | dumpBackup.php, and saves them into the current wiki. |
82 | |
83 | Compressed XML files may be read directly: |
84 | .gz $gz |
85 | .bz2 $bz2 |
86 | .7z (if 7za executable is in PATH) |
87 | |
88 | Note that for very large data sets, importDump.php may be slow; there are |
89 | alternate methods which can be much faster for full site restoration: |
90 | <https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps> |
91 | TEXT |
92 | ); |
93 | $this->stderr = fopen( "php://stderr", "wt" ); |
94 | $this->addOption( 'report', |
95 | 'Report position and speed after every n pages processed', false, true ); |
96 | $this->addOption( 'namespaces', |
97 | 'Import only the pages from namespaces belonging to the list of ' . |
98 | 'pipe-separated namespace names or namespace indexes', false, true ); |
99 | $this->addOption( 'rootpage', 'Pages will be imported as subpages of the specified page', |
100 | false, true ); |
101 | $this->addOption( 'dry-run', 'Parse dump without actually importing pages' ); |
102 | $this->addOption( 'debug', 'Output extra verbose debug information' ); |
103 | $this->addOption( 'uploads', 'Process file upload data if included (experimental)' ); |
104 | $this->addOption( |
105 | 'no-updates', |
106 | 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state' |
107 | ); |
108 | $this->addOption( 'image-base-path', 'Import files from a specified path', false, true ); |
109 | $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true ); |
110 | $this->addOption( 'username-prefix', |
111 | 'Prefix for interwiki usernames; a trailing ">" will be added. Default: "imported>"', |
112 | false, true ); |
113 | $this->addOption( 'no-local-users', |
114 | 'Treat all usernames as interwiki. ' . |
115 | 'The default is to assign edits to local users where they exist.', |
116 | false, false |
117 | ); |
118 | $this->addArg( 'file', 'Dump file to import [else use stdin]', false ); |
119 | } |
120 | |
121 | public function execute() { |
122 | if ( $this->getServiceContainer()->getReadOnlyMode()->isReadOnly() ) { |
123 | $this->fatalError( "Wiki is in read-only mode; you'll need to disable it for import to work." ); |
124 | } |
125 | |
126 | $this->reportingInterval = intval( $this->getOption( 'report', 100 ) ); |
127 | if ( !$this->reportingInterval ) { |
128 | // avoid division by zero |
129 | $this->reportingInterval = 100; |
130 | } |
131 | |
132 | $this->dryRun = $this->hasOption( 'dry-run' ); |
133 | $this->uploads = $this->hasOption( 'uploads' ); |
134 | |
135 | if ( $this->hasOption( 'image-base-path' ) ) { |
136 | $this->imageBasePath = $this->getOption( 'image-base-path' ); |
137 | } |
138 | if ( $this->hasOption( 'namespaces' ) ) { |
139 | $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) ); |
140 | } |
141 | |
142 | if ( $this->hasArg( 0 ) ) { |
143 | $this->importFromFile( $this->getArg( 0 ) ); |
144 | } else { |
145 | $this->importFromStdin(); |
146 | } |
147 | |
148 | $this->output( "Done!\n" ); |
149 | $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges,\n" ); |
150 | $this->output( "and initSiteStats.php to update page and revision counts\n" ); |
151 | } |
152 | |
153 | private function setNsfilter( array $namespaces ) { |
154 | if ( count( $namespaces ) == 0 ) { |
155 | $this->nsFilter = false; |
156 | |
157 | return; |
158 | } |
159 | $this->nsFilter = array_unique( array_map( [ $this, 'getNsIndex' ], $namespaces ) ); |
160 | } |
161 | |
162 | private function getNsIndex( $namespace ) { |
163 | $contLang = $this->getServiceContainer()->getContentLanguage(); |
164 | $result = $contLang->getNsIndex( $namespace ); |
165 | if ( $result !== false ) { |
166 | return $result; |
167 | } |
168 | $ns = intval( $namespace ); |
169 | if ( strval( $ns ) === $namespace && $contLang->getNsText( $ns ) !== false ) { |
170 | return $ns; |
171 | } |
172 | $this->fatalError( "Unknown namespace text / index specified: $namespace" ); |
173 | } |
174 | |
175 | /** |
176 | * @param LinkTarget|null $title |
177 | * @return bool |
178 | */ |
179 | private function skippedNamespace( $title ) { |
180 | if ( $title === null ) { |
181 | // Probably a log entry |
182 | return false; |
183 | } |
184 | |
185 | $ns = $title->getNamespace(); |
186 | |
187 | return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter ); |
188 | } |
189 | |
190 | public function reportPage( $page ) { |
191 | $this->pageCount++; |
192 | } |
193 | |
194 | public function handleRevision( WikiRevision $rev ) { |
195 | $title = $rev->getTitle(); |
196 | if ( !$title ) { |
197 | $this->progress( "Got bogus revision with null title!" ); |
198 | |
199 | return; |
200 | } |
201 | |
202 | if ( $this->skippedNamespace( $title ) ) { |
203 | return; |
204 | } |
205 | |
206 | $this->revCount++; |
207 | $this->report(); |
208 | |
209 | if ( !$this->dryRun ) { |
210 | call_user_func( $this->importCallback, $rev ); |
211 | } |
212 | } |
213 | |
214 | /** |
215 | * @param WikiRevision $revision |
216 | * @return bool |
217 | */ |
218 | public function handleUpload( WikiRevision $revision ) { |
219 | if ( $this->uploads ) { |
220 | if ( $this->skippedNamespace( $revision->getTitle() ) ) { |
221 | return false; |
222 | } |
223 | $this->uploadCount++; |
224 | // $this->report(); |
225 | $this->progress( "upload: " . $revision->getFilename() ); |
226 | |
227 | if ( !$this->dryRun ) { |
228 | // bluuuh hack |
229 | // call_user_func( $this->uploadCallback, $revision ); |
230 | $importer = $this->getServiceContainer()->getWikiRevisionUploadImporter(); |
231 | $statusValue = $importer->import( $revision ); |
232 | |
233 | return $statusValue->isGood(); |
234 | } |
235 | } |
236 | |
237 | return false; |
238 | } |
239 | |
240 | public function handleLogItem( WikiRevision $rev ) { |
241 | if ( $this->skippedNamespace( $rev->getTitle() ) ) { |
242 | return; |
243 | } |
244 | $this->revCount++; |
245 | $this->report(); |
246 | |
247 | if ( !$this->dryRun ) { |
248 | call_user_func( $this->logItemCallback, $rev ); |
249 | } |
250 | } |
251 | |
252 | private function report( $final = false ) { |
253 | if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) { |
254 | $this->showReport(); |
255 | } |
256 | } |
257 | |
258 | private function showReport() { |
259 | if ( !$this->mQuiet ) { |
260 | $delta = microtime( true ) - $this->startTime; |
261 | if ( $delta ) { |
262 | $rate = sprintf( "%.2f", $this->pageCount / $delta ); |
263 | $revrate = sprintf( "%.2f", $this->revCount / $delta ); |
264 | } else { |
265 | $rate = '-'; |
266 | $revrate = '-'; |
267 | } |
268 | # Logs dumps don't have page tallies |
269 | if ( $this->pageCount ) { |
270 | $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" ); |
271 | } else { |
272 | $this->progress( "$this->revCount ($revrate revs/sec)" ); |
273 | } |
274 | } |
275 | $this->waitForReplication(); |
276 | } |
277 | |
278 | private function progress( $string ) { |
279 | fwrite( $this->stderr, $string . "\n" ); |
280 | } |
281 | |
282 | private function importFromFile( $filename ) { |
283 | if ( preg_match( '/\.gz$/', $filename ) ) { |
284 | $filename = 'compress.zlib://' . $filename; |
285 | } elseif ( preg_match( '/\.bz2$/', $filename ) ) { |
286 | $filename = 'compress.bzip2://' . $filename; |
287 | } elseif ( preg_match( '/\.7z$/', $filename ) ) { |
288 | $filename = 'mediawiki.compress.7z://' . $filename; |
289 | } |
290 | |
291 | $file = fopen( $filename, 'rt' ); |
292 | if ( $file === false ) { |
293 | $this->fatalError( error_get_last()['message'] ?? 'Could not open file' ); |
294 | } |
295 | |
296 | return $this->importFromHandle( $file ); |
297 | } |
298 | |
299 | private function importFromStdin() { |
300 | $file = fopen( 'php://stdin', 'rt' ); |
301 | if ( self::posix_isatty( $file ) ) { |
302 | $this->maybeHelp( true ); |
303 | } |
304 | |
305 | return $this->importFromHandle( $file ); |
306 | } |
307 | |
308 | private function importFromHandle( $handle ) { |
309 | $this->startTime = microtime( true ); |
310 | |
311 | $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] ); |
312 | |
313 | $source = new ImportStreamSource( $handle ); |
314 | $importer = $this->getServiceContainer() |
315 | ->getWikiImporterFactory() |
316 | ->getWikiImporter( $source, new UltimateAuthority( $user ) ); |
317 | |
318 | // Updating statistics require a lot of time so disable it |
319 | $importer->disableStatisticsUpdate(); |
320 | |
321 | if ( $this->hasOption( 'debug' ) ) { |
322 | $importer->setDebug( true ); |
323 | } |
324 | if ( $this->hasOption( 'no-updates' ) ) { |
325 | $importer->setNoUpdates( true ); |
326 | } |
327 | $importer->setUsernamePrefix( |
328 | $this->getOption( 'username-prefix', 'imported' ), |
329 | !$this->hasOption( 'no-local-users' ) |
330 | ); |
331 | if ( $this->hasOption( 'rootpage' ) ) { |
332 | $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) ); |
333 | if ( !$statusRootPage->isGood() ) { |
334 | // Die here so that it doesn't print "Done!" |
335 | $this->fatalError( $statusRootPage ); |
336 | } |
337 | } |
338 | if ( $this->hasOption( 'skip-to' ) ) { |
339 | $nthPage = (int)$this->getOption( 'skip-to' ); |
340 | $importer->setPageOffset( $nthPage ); |
341 | $this->pageCount = $nthPage - 1; |
342 | } |
343 | $importer->setPageCallback( [ $this, 'reportPage' ] ); |
344 | $importer->setNoticeCallback( static function ( $msg, $params ) { |
345 | echo wfMessage( $msg, $params )->text() . "\n"; |
346 | } ); |
347 | $this->importCallback = $importer->setRevisionCallback( |
348 | [ $this, 'handleRevision' ] ); |
349 | $this->uploadCallback = $importer->setUploadCallback( |
350 | [ $this, 'handleUpload' ] ); |
351 | $this->logItemCallback = $importer->setLogItemCallback( |
352 | [ $this, 'handleLogItem' ] ); |
353 | if ( $this->uploads ) { |
354 | $importer->setImportUploads( true ); |
355 | } |
356 | if ( $this->imageBasePath ) { |
357 | $importer->setImageBasePath( $this->imageBasePath ); |
358 | } |
359 | |
360 | if ( $this->dryRun ) { |
361 | $importer->setPageOutCallback( null ); |
362 | } |
363 | |
364 | return $importer->doImport(); |
365 | } |
366 | } |
367 | |
368 | // @codeCoverageIgnoreStart |
369 | $maintClass = BackupReader::class; |
370 | require_once RUN_MAINTENANCE_IF_MAIN; |
371 | // @codeCoverageIgnoreEnd |