Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 309 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ImportImages | |
0.00% |
0 / 306 |
|
0.00% |
0 / 6 |
4970 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 79 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 191 |
|
0.00% |
0 / 1 |
2550 | |||
findFiles | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
110 | |||
findAuxFile | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
30 | |||
getFileCommentFromSourceWiki | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
getFileUserFromSourceWiki | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * Import one or more images from the local file system into the wiki without |
4 | * using the web-based interface. |
5 | * |
6 | * "Smart import" additions: |
7 | * - aim: preserve the essential metadata (user, description) when importing media |
8 | * files from an existing wiki. |
9 | * - process: |
10 | * - interface with the source wiki, don't use bare files only (see --source-wiki-url). |
11 | * - fetch metadata from source wiki for each file to import. |
12 | * - commit the fetched metadata to the destination wiki while submitting. |
13 | * |
14 | * This program is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU General Public License as published by |
16 | * the Free Software Foundation; either version 2 of the License, or |
17 | * (at your option) any later version. |
18 | * |
19 | * This program is distributed in the hope that it will be useful, |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | * GNU General Public License for more details. |
23 | * |
24 | * You should have received a copy of the GNU General Public License along |
25 | * with this program; if not, write to the Free Software Foundation, Inc., |
26 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
27 | * http://www.gnu.org/copyleft/gpl.html |
28 | * |
29 | * @file |
30 | * @ingroup Maintenance |
31 | * @author Rob Church <robchur@gmail.com> |
32 | * @author Mij <mij@bitchx.it> |
33 | */ |
34 | |
35 | require_once __DIR__ . '/Maintenance.php'; |
36 | |
37 | use MediaWiki\MainConfigNames; |
38 | use MediaWiki\Specials\SpecialUpload; |
39 | use MediaWiki\StubObject\StubGlobalUser; |
40 | use MediaWiki\Title\Title; |
41 | use MediaWiki\User\User; |
42 | |
43 | class ImportImages extends Maintenance { |
44 | |
45 | public function __construct() { |
46 | parent::__construct(); |
47 | |
48 | $this->addDescription( 'Imports images and other media files into the wiki' ); |
49 | $this->addArg( 'dir', 'Path to the directory containing images to be imported' ); |
50 | |
51 | $this->addOption( 'extensions', |
52 | 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions', |
53 | false, |
54 | true |
55 | ); |
56 | $this->addOption( 'overwrite', |
57 | 'Overwrite existing images with the same name (default is to skip them)' ); |
58 | $this->addOption( 'limit', |
59 | 'Limit the number of images to process. Ignored or skipped images are not counted', |
60 | false, |
61 | true |
62 | ); |
63 | $this->addOption( 'from', |
64 | "Ignore all files until the one with the given name. Useful for resuming aborted " |
65 | . "imports. The name should be the file's canonical database form.", |
66 | false, |
67 | true |
68 | ); |
69 | $this->addOption( 'skip-dupes', |
70 | 'Skip images that were already uploaded under a different name (check SHA1)' ); |
71 | $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' ); |
72 | $this->addOption( 'sleep', |
73 | 'Sleep between files. Useful mostly for debugging', |
74 | false, |
75 | true |
76 | ); |
77 | $this->addOption( 'user', |
78 | "Set username of uploader, default 'Maintenance script'", |
79 | false, |
80 | true |
81 | ); |
82 | // This parameter can optionally have an argument. If none specified, getOption() |
83 | // returns 1 which is precisely what we need. |
84 | $this->addOption( 'check-userblock', 'Check if the user got blocked during import' ); |
85 | $this->addOption( 'comment', |
86 | "Set file description, default 'Importing file'", |
87 | false, |
88 | true |
89 | ); |
90 | $this->addOption( 'comment-file', |
91 | 'Set description to the content of this file', |
92 | false, |
93 | true |
94 | ); |
95 | $this->addOption( 'comment-ext', |
96 | 'Causes the description for each file to be loaded from a file with the same name, but ' |
97 | . 'the extension provided. If a global description is also given, it is appended.', |
98 | false, |
99 | true |
100 | ); |
101 | $this->addOption( 'summary', |
102 | 'Upload summary, description will be used if not provided', |
103 | false, |
104 | true |
105 | ); |
106 | $this->addOption( 'license', |
107 | 'Use an optional license template', |
108 | false, |
109 | true |
110 | ); |
111 | $this->addOption( 'timestamp', |
112 | 'Override upload time/date, all MediaWiki timestamp formats are accepted', |
113 | false, |
114 | true |
115 | ); |
116 | $this->addOption( 'protect', |
117 | 'Specify the protect value (autoconfirmed,sysop)', |
118 | false, |
119 | true |
120 | ); |
121 | $this->addOption( 'unprotect', 'Unprotects all uploaded images' ); |
122 | $this->addOption( 'source-wiki-url', |
123 | 'If specified, take User and Comment data for each imported file from this URL. ' |
124 | . 'For example, --source-wiki-url="https://en.wikipedia.org/w/', |
125 | false, |
126 | true |
127 | ); |
128 | $this->addOption( 'dry', "Dry run, don't import anything" ); |
129 | } |
130 | |
131 | public function execute() { |
132 | $services = $this->getServiceContainer(); |
133 | $permissionManager = $services->getPermissionManager(); |
134 | |
135 | $found = 0; |
136 | $processed = 0; |
137 | $statistics = [ |
138 | 'ignored' => 0, |
139 | 'added' => 0, |
140 | 'skipped' => 0, |
141 | 'overwritten' => 0, |
142 | 'failed' => 0, |
143 | ]; |
144 | |
145 | $this->output( "Importing Files\n\n" ); |
146 | |
147 | $dir = $this->getArg( 0 ); |
148 | |
149 | # Check Protection |
150 | if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) { |
151 | $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" ); |
152 | } |
153 | |
154 | if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) { |
155 | $this->fatalError( "You must specify a protection option.\n" ); |
156 | } |
157 | |
158 | # Prepare the list of allowed extensions |
159 | $extensions = $this->hasOption( 'extensions' ) |
160 | ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) ) |
161 | : $this->getConfig()->get( MainConfigNames::FileExtensions ); |
162 | |
163 | # Search the path provided for candidates for import |
164 | $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) ); |
165 | if ( !$files->valid() ) { |
166 | $this->output( "No suitable files could be found for import.\n" ); |
167 | return; |
168 | } |
169 | |
170 | # Initialise the user for this operation |
171 | $user = $this->hasOption( 'user' ) |
172 | ? User::newFromName( $this->getOption( 'user' ) ) |
173 | : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] ); |
174 | if ( !$user instanceof User ) { |
175 | $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] ); |
176 | } |
177 | '@phan-var User $user'; |
178 | StubGlobalUser::setUser( $user ); |
179 | |
180 | # Get block check. If a value is given, this specified how often the check is performed |
181 | $checkUserBlock = (int)$this->getOption( 'check-userblock' ); |
182 | |
183 | $from = $this->getOption( 'from' ); |
184 | $sleep = (int)$this->getOption( 'sleep' ); |
185 | $limit = (int)$this->getOption( 'limit' ); |
186 | $timestamp = $this->getOption( 'timestamp', false ); |
187 | |
188 | # Get the upload comment. Provide a default one in case there's no comment given. |
189 | $commentFile = $this->getOption( 'comment-file' ); |
190 | if ( $commentFile !== null ) { |
191 | $comment = file_get_contents( $commentFile ); |
192 | if ( $comment === false || $comment === null ) { |
193 | $this->fatalError( "failed to read comment file: {$commentFile}\n" ); |
194 | } |
195 | } else { |
196 | $comment = $this->getOption( 'comment', 'Importing file' ); |
197 | } |
198 | $commentExt = $this->getOption( 'comment-ext' ); |
199 | $summary = $this->getOption( 'summary', '' ); |
200 | $license = $this->getOption( 'license', '' ); |
201 | $sourceWikiUrl = $this->getOption( 'source-wiki-url' ); |
202 | |
203 | $tags = in_array( ChangeTags::TAG_SERVER_SIDE_UPLOAD, ChangeTags::getSoftwareTags() ) |
204 | ? [ ChangeTags::TAG_SERVER_SIDE_UPLOAD ] |
205 | : []; |
206 | |
207 | # Batch "upload" operation |
208 | $restrictionStore = $services->getRestrictionStore(); |
209 | foreach ( $files as $file ) { |
210 | $found++; |
211 | if ( $sleep && ( $processed > 0 ) ) { |
212 | sleep( $sleep ); |
213 | } |
214 | |
215 | $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) ); |
216 | |
217 | # Validate a title |
218 | $title = Title::makeTitleSafe( NS_FILE, $base ); |
219 | if ( !$title ) { |
220 | $this->output( |
221 | "{$base} could not be imported; a valid title cannot be produced\n" |
222 | ); |
223 | continue; |
224 | } |
225 | |
226 | if ( $from ) { |
227 | if ( $from !== $title->getDBkey() ) { |
228 | $statistics['ignored']++; |
229 | continue; |
230 | } |
231 | // Found the requested file, continue from here |
232 | $from = null; |
233 | } |
234 | |
235 | if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) { |
236 | $user->clearInstanceCache( 'name' ); // reload from DB! |
237 | if ( $permissionManager->isBlockedFrom( $user, $title ) ) { |
238 | $this->output( |
239 | "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n" |
240 | ); |
241 | $statistics['skipped']++; |
242 | continue; |
243 | } |
244 | } |
245 | |
246 | # Check existence |
247 | $image = $services->getRepoGroup()->getLocalRepo() |
248 | ->newFile( $title ); |
249 | if ( $image->exists() ) { |
250 | if ( $this->hasOption( 'overwrite' ) ) { |
251 | $this->output( "{$base} exists, overwriting..." ); |
252 | $svar = 'overwritten'; |
253 | } else { |
254 | $this->output( "{$base} exists, skipping\n" ); |
255 | $statistics['skipped']++; |
256 | continue; |
257 | } |
258 | } else { |
259 | if ( $this->hasOption( 'skip-dupes' ) ) { |
260 | $repo = $image->getRepo(); |
261 | # XXX: we end up calculating this again when actually uploading. that sucks. |
262 | $sha1 = FSFile::getSha1Base36FromPath( $file ); |
263 | $dupes = $repo->findBySha1( $sha1 ); |
264 | if ( $dupes ) { |
265 | $this->output( |
266 | "{$base} already exists as {$dupes[0]->getName()}, skipping\n" |
267 | ); |
268 | $statistics['skipped']++; |
269 | continue; |
270 | } |
271 | } |
272 | |
273 | $this->output( "Importing {$base}..." ); |
274 | $svar = 'added'; |
275 | } |
276 | |
277 | if ( $sourceWikiUrl ) { |
278 | /* find comment text directly from source wiki, through MW's API */ |
279 | $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base ); |
280 | $commentText = $real_comment !== false ? $real_comment : $comment; |
281 | |
282 | /* find user directly from source wiki, through MW's API */ |
283 | $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base ); |
284 | if ( $real_user !== false ) { |
285 | $realUser = User::newFromName( $real_user ); |
286 | if ( $realUser === false ) { |
287 | # user does not exist in target wiki |
288 | $this->output( |
289 | "failed: user '$real_user' does not exist in target wiki." |
290 | ); |
291 | continue; |
292 | } |
293 | StubGlobalUser::setUser( $realUser ); |
294 | $user = $realUser; |
295 | } |
296 | } else { |
297 | # Find comment text |
298 | $commentText = false; |
299 | |
300 | if ( $commentExt ) { |
301 | $f = $this->findAuxFile( $file, $commentExt ); |
302 | if ( !$f ) { |
303 | $this->output( " No comment file with extension {$commentExt} found " |
304 | . "for {$file}, using default comment." ); |
305 | } else { |
306 | $commentText = file_get_contents( $f ); |
307 | if ( !$commentText ) { |
308 | $this->output( |
309 | " Failed to load comment file {$f}, using default comment." |
310 | ); |
311 | } |
312 | } |
313 | } |
314 | |
315 | if ( !$commentText ) { |
316 | $commentText = $comment; |
317 | } |
318 | } |
319 | |
320 | # Import the file |
321 | if ( $this->hasOption( 'dry' ) ) { |
322 | $this->output( |
323 | " publishing {$file} by '{$user->getName()}', comment '$commentText'..." |
324 | ); |
325 | } else { |
326 | $mwProps = new MWFileProps( $services->getMimeAnalyzer() ); |
327 | $props = $mwProps->getPropsFromPath( $file, true ); |
328 | $flags = 0; |
329 | $publishOptions = []; |
330 | $handler = MediaHandler::getHandler( $props['mime'] ); |
331 | if ( $handler ) { |
332 | $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] ); |
333 | } else { |
334 | $publishOptions['headers'] = []; |
335 | } |
336 | $archive = $image->publish( $file, $flags, $publishOptions ); |
337 | if ( !$archive->isGood() ) { |
338 | $this->output( "failed. (" . |
339 | $archive->getMessage( false, false, 'en' )->text() . |
340 | ")\n" ); |
341 | $statistics['failed']++; |
342 | continue; |
343 | } |
344 | } |
345 | |
346 | $commentText = SpecialUpload::getInitialPageText( $commentText, $license ); |
347 | if ( !$this->hasOption( 'summary' ) ) { |
348 | $summary = $commentText; |
349 | } |
350 | |
351 | if ( $this->hasOption( 'dry' ) ) { |
352 | $this->output( "done.\n" ); |
353 | } elseif ( $image->recordUpload3( |
354 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable |
355 | $archive->value, |
356 | $summary, |
357 | $commentText, |
358 | $user, |
359 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable |
360 | $props, |
361 | $timestamp, |
362 | $tags |
363 | )->isOK() ) { |
364 | $this->output( "done.\n" ); |
365 | |
366 | $doProtect = false; |
367 | |
368 | $protectLevel = $this->getOption( 'protect' ); |
369 | $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels ); |
370 | |
371 | if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) { |
372 | $doProtect = true; |
373 | } |
374 | if ( $this->hasOption( 'unprotect' ) ) { |
375 | $protectLevel = ''; |
376 | $doProtect = true; |
377 | } |
378 | |
379 | if ( $doProtect ) { |
380 | # Protect the file |
381 | $this->output( "\nWaiting for replica DBs...\n" ); |
382 | // Wait for replica DBs. |
383 | sleep( 2 ); # Why this sleep? |
384 | $this->waitForReplication(); |
385 | |
386 | $this->output( "\nSetting image restrictions ..." ); |
387 | |
388 | $cascade = false; |
389 | $restrictions = []; |
390 | foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) { |
391 | $restrictions[$type] = $protectLevel; |
392 | } |
393 | |
394 | $page = $services->getWikiPageFactory()->newFromTitle( $title ); |
395 | $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user ); |
396 | $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" ); |
397 | } |
398 | } else { |
399 | $this->output( "failed. (at recordUpload stage)\n" ); |
400 | $svar = 'failed'; |
401 | } |
402 | |
403 | $statistics[$svar]++; |
404 | $processed++; |
405 | |
406 | if ( $limit && $processed >= $limit ) { |
407 | break; |
408 | } |
409 | } |
410 | |
411 | # Print out some statistics |
412 | $this->output( "\n" ); |
413 | foreach ( array_merge( |
414 | [ |
415 | 'Found' => $found, |
416 | 'Limit' => $limit, |
417 | ], |
418 | $statistics |
419 | ) as $desc => $number ) { |
420 | if ( $number > 0 ) { |
421 | $this->output( ucfirst( $desc ) . ": $number\n" ); |
422 | } |
423 | } |
424 | } |
425 | |
426 | /** |
427 | * Search a directory for files with one of a set of extensions |
428 | * |
429 | * @param string $dir Path to directory to search |
430 | * @param array $exts Array of lowercase extensions to search for |
431 | * @param bool $recurse Search subdirectories recursively |
432 | * @return Generator<string> Generator that iterating filenames |
433 | */ |
434 | private function findFiles( $dir, $exts, $recurse = false ) { |
435 | $dhl = is_dir( $dir ) ? opendir( $dir ) : false; |
436 | if ( !$dhl ) { |
437 | return; |
438 | } |
439 | |
440 | while ( ( $file = readdir( $dhl ) ) !== false ) { |
441 | if ( is_file( $dir . '/' . $file ) ) { |
442 | $ext = pathinfo( $file, PATHINFO_EXTENSION ); |
443 | if ( in_array( strtolower( $ext ), $exts ) ) { |
444 | yield $dir . '/' . $file; |
445 | } |
446 | } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) { |
447 | yield from $this->findFiles( $dir . '/' . $file, $exts, true ); |
448 | } |
449 | } |
450 | } |
451 | |
452 | /** |
453 | * Find an auxiliary file with the given extension, matching |
454 | * the give base file path. $maxStrip determines how many extensions |
455 | * may be stripped from the original file name before appending the |
456 | * new extension. For example, with $maxStrip = 1 (the default), |
457 | * file files acme.foo.bar.txt and acme.foo.txt would be auxilliary |
458 | * files for acme.foo.bar and the extension ".txt". With $maxStrip = 2, |
459 | * acme.txt would also be acceptable. |
460 | * |
461 | * @param string $file Base path |
462 | * @param string $auxExtension The extension to be appended to the base path |
463 | * @param int $maxStrip The maximum number of extensions to strip from the base path (default: 1) |
464 | * @return string|false |
465 | */ |
466 | private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) { |
467 | if ( !str_starts_with( $auxExtension, '.' ) ) { |
468 | $auxExtension = '.' . $auxExtension; |
469 | } |
470 | |
471 | $d = dirname( $file ); |
472 | $n = basename( $file ); |
473 | |
474 | while ( $maxStrip >= 0 ) { |
475 | $f = $d . '/' . $n . $auxExtension; |
476 | |
477 | if ( file_exists( $f ) ) { |
478 | return $f; |
479 | } |
480 | |
481 | $idx = strrpos( $n, '.' ); |
482 | if ( !$idx ) { |
483 | break; |
484 | } |
485 | |
486 | $n = substr( $n, 0, $idx ); |
487 | $maxStrip -= 1; |
488 | } |
489 | |
490 | return false; |
491 | } |
492 | |
493 | /** |
494 | * @todo FIXME: Access the api in a better way and performing just one query |
495 | * (preferably batching files too). |
496 | * |
497 | * @param string $wiki_host |
498 | * @param string $file |
499 | * |
500 | * @return string|false |
501 | */ |
502 | private function getFileCommentFromSourceWiki( $wiki_host, $file ) { |
503 | $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:' |
504 | . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment'; |
505 | $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ ); |
506 | if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) { |
507 | return false; |
508 | } |
509 | |
510 | return html_entity_decode( $matches[1] ); |
511 | } |
512 | |
513 | private function getFileUserFromSourceWiki( $wiki_host, $file ) { |
514 | $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:' |
515 | . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user'; |
516 | $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ ); |
517 | if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) { |
518 | return false; |
519 | } |
520 | |
521 | return html_entity_decode( $matches[1] ); |
522 | } |
523 | |
524 | } |
525 | |
526 | $maintClass = ImportImages::class; |
527 | require_once RUN_MAINTENANCE_IF_MAIN; |