Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 309 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ImportImages | |
0.00% |
0 / 309 |
|
0.00% |
0 / 6 |
4970 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 79 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 194 |
|
0.00% |
0 / 1 |
2550 | |||
findFiles | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
110 | |||
findAuxFile | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
30 | |||
getFileCommentFromSourceWiki | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
getFileUserFromSourceWiki | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * Import one or more images from the local file system into the wiki without |
4 | * using the web-based interface. |
5 | * |
6 | * "Smart import" additions: |
7 | * - aim: preserve the essential metadata (user, description) when importing media |
8 | * files from an existing wiki. |
9 | * - process: |
10 | * - interface with the source wiki, don't use bare files only (see --source-wiki-url). |
11 | * - fetch metadata from source wiki for each file to import. |
12 | * - commit the fetched metadata to the destination wiki while submitting. |
13 | * |
14 | * This program is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU General Public License as published by |
16 | * the Free Software Foundation; either version 2 of the License, or |
17 | * (at your option) any later version. |
18 | * |
19 | * This program is distributed in the hope that it will be useful, |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | * GNU General Public License for more details. |
23 | * |
24 | * You should have received a copy of the GNU General Public License along |
25 | * with this program; if not, write to the Free Software Foundation, Inc., |
26 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
27 | * http://www.gnu.org/copyleft/gpl.html |
28 | * |
29 | * @file |
30 | * @ingroup Maintenance |
31 | * @author Rob Church <robchur@gmail.com> |
32 | * @author Mij <mij@bitchx.it> |
33 | */ |
34 | |
35 | // @codeCoverageIgnoreStart |
36 | require_once __DIR__ . '/Maintenance.php'; |
37 | // @codeCoverageIgnoreEnd |
38 | |
39 | use MediaWiki\MainConfigNames; |
40 | use MediaWiki\Maintenance\Maintenance; |
41 | use MediaWiki\Specials\SpecialUpload; |
42 | use MediaWiki\StubObject\StubGlobalUser; |
43 | use MediaWiki\Title\Title; |
44 | use MediaWiki\User\User; |
45 | use Wikimedia\FileBackend\FSFile\FSFile; |
46 | |
47 | class ImportImages extends Maintenance { |
48 | |
49 | public function __construct() { |
50 | parent::__construct(); |
51 | |
52 | $this->addDescription( 'Imports images and other media files into the wiki' ); |
53 | $this->addArg( 'dir', 'Path to the directory containing images to be imported' ); |
54 | |
55 | $this->addOption( 'extensions', |
56 | 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions', |
57 | false, |
58 | true |
59 | ); |
60 | $this->addOption( 'overwrite', |
61 | 'Overwrite existing images with the same name (default is to skip them)' ); |
62 | $this->addOption( 'limit', |
63 | 'Limit the number of images to process. Ignored or skipped images are not counted', |
64 | false, |
65 | true |
66 | ); |
67 | $this->addOption( 'from', |
68 | "Ignore all files until the one with the given name. Useful for resuming aborted " |
69 | . "imports. The name should be the file's canonical database form.", |
70 | false, |
71 | true |
72 | ); |
73 | $this->addOption( 'skip-dupes', |
74 | 'Skip images that were already uploaded under a different name (check SHA1)' ); |
75 | $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' ); |
76 | $this->addOption( 'sleep', |
77 | 'Sleep between files. Useful mostly for debugging', |
78 | false, |
79 | true |
80 | ); |
81 | $this->addOption( 'user', |
82 | "Set username of uploader, default 'Maintenance script'", |
83 | false, |
84 | true |
85 | ); |
86 | // This parameter can optionally have an argument. If none specified, getOption() |
87 | // returns 1 which is precisely what we need. |
88 | $this->addOption( 'check-userblock', 'Check if the user got blocked during import' ); |
89 | $this->addOption( 'comment', |
90 | "Set file description, default 'Importing file'", |
91 | false, |
92 | true |
93 | ); |
94 | $this->addOption( 'comment-file', |
95 | 'Set description to the content of this file', |
96 | false, |
97 | true |
98 | ); |
99 | $this->addOption( 'comment-ext', |
100 | 'Causes the description for each file to be loaded from a file with the same name, but ' |
101 | . 'the extension provided. If a global description is also given, it is appended.', |
102 | false, |
103 | true |
104 | ); |
105 | $this->addOption( 'summary', |
106 | 'Upload summary, description will be used if not provided', |
107 | false, |
108 | true |
109 | ); |
110 | $this->addOption( 'license', |
111 | 'Use an optional license template', |
112 | false, |
113 | true |
114 | ); |
115 | $this->addOption( 'timestamp', |
116 | 'Override upload time/date, all MediaWiki timestamp formats are accepted', |
117 | false, |
118 | true |
119 | ); |
120 | $this->addOption( 'protect', |
121 | 'Specify the protect value (autoconfirmed,sysop)', |
122 | false, |
123 | true |
124 | ); |
125 | $this->addOption( 'unprotect', 'Unprotects all uploaded images' ); |
126 | $this->addOption( 'source-wiki-url', |
127 | 'If specified, take User and Comment data for each imported file from this URL. ' |
128 | . 'For example, --source-wiki-url="https://en.wikipedia.org/w/', |
129 | false, |
130 | true |
131 | ); |
132 | $this->addOption( 'dry', "Dry run, don't import anything" ); |
133 | } |
134 | |
135 | public function execute() { |
136 | $services = $this->getServiceContainer(); |
137 | $permissionManager = $services->getPermissionManager(); |
138 | |
139 | $found = 0; |
140 | $processed = 0; |
141 | $statistics = [ |
142 | 'ignored' => 0, |
143 | 'added' => 0, |
144 | 'skipped' => 0, |
145 | 'overwritten' => 0, |
146 | 'failed' => 0, |
147 | ]; |
148 | |
149 | $this->output( "Importing Files\n\n" ); |
150 | |
151 | $dir = $this->getArg( 0 ); |
152 | |
153 | # Check Protection |
154 | if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) { |
155 | $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" ); |
156 | } |
157 | |
158 | if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) { |
159 | $this->fatalError( "You must specify a protection option.\n" ); |
160 | } |
161 | |
162 | # Prepare the list of allowed extensions |
163 | $extensions = $this->hasOption( 'extensions' ) |
164 | ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) ) |
165 | : $this->getConfig()->get( MainConfigNames::FileExtensions ); |
166 | |
167 | # Search the path provided for candidates for import |
168 | $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) ); |
169 | if ( !$files->valid() ) { |
170 | $this->output( "No suitable files could be found for import.\n" ); |
171 | return; |
172 | } |
173 | |
174 | # Initialise the user for this operation |
175 | $user = $this->hasOption( 'user' ) |
176 | ? User::newFromName( $this->getOption( 'user' ) ) |
177 | : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] ); |
178 | if ( !$user instanceof User ) { |
179 | $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] ); |
180 | } |
181 | '@phan-var User $user'; |
182 | StubGlobalUser::setUser( $user ); |
183 | |
184 | # Get block check. If a value is given, this specified how often the check is performed |
185 | $checkUserBlock = (int)$this->getOption( 'check-userblock' ); |
186 | |
187 | $from = $this->getOption( 'from' ); |
188 | $sleep = (int)$this->getOption( 'sleep' ); |
189 | $limit = (int)$this->getOption( 'limit' ); |
190 | $timestamp = $this->getOption( 'timestamp', false ); |
191 | |
192 | # Get the upload comment. Provide a default one in case there's no comment given. |
193 | $commentFile = $this->getOption( 'comment-file' ); |
194 | if ( $commentFile !== null ) { |
195 | $comment = file_get_contents( $commentFile ); |
196 | if ( $comment === false || $comment === null ) { |
197 | $this->fatalError( "failed to read comment file: {$commentFile}\n" ); |
198 | } |
199 | } else { |
200 | $comment = $this->getOption( 'comment', 'Importing file' ); |
201 | } |
202 | $commentExt = $this->getOption( 'comment-ext' ); |
203 | $summary = $this->getOption( 'summary', '' ); |
204 | $license = $this->getOption( 'license', '' ); |
205 | $sourceWikiUrl = $this->getOption( 'source-wiki-url' ); |
206 | |
207 | $tags = in_array( |
208 | ChangeTags::TAG_SERVER_SIDE_UPLOAD, |
209 | $this->getServiceContainer()->getChangeTagsStore()->getSoftwareTags() |
210 | ) |
211 | ? [ ChangeTags::TAG_SERVER_SIDE_UPLOAD ] |
212 | : []; |
213 | |
214 | # Batch "upload" operation |
215 | $restrictionStore = $services->getRestrictionStore(); |
216 | foreach ( $files as $file ) { |
217 | $found++; |
218 | if ( $sleep && ( $processed > 0 ) ) { |
219 | sleep( $sleep ); |
220 | } |
221 | |
222 | $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) ); |
223 | |
224 | # Validate a title |
225 | $title = Title::makeTitleSafe( NS_FILE, $base ); |
226 | if ( !$title ) { |
227 | $this->output( |
228 | "{$base} could not be imported; a valid title cannot be produced\n" |
229 | ); |
230 | continue; |
231 | } |
232 | |
233 | if ( $from ) { |
234 | if ( $from !== $title->getDBkey() ) { |
235 | $statistics['ignored']++; |
236 | continue; |
237 | } |
238 | // Found the requested file, continue from here |
239 | $from = null; |
240 | } |
241 | |
242 | if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) { |
243 | $user->clearInstanceCache( 'name' ); // reload from DB! |
244 | if ( $permissionManager->isBlockedFrom( $user, $title ) ) { |
245 | $this->output( |
246 | "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n" |
247 | ); |
248 | $statistics['skipped']++; |
249 | continue; |
250 | } |
251 | } |
252 | |
253 | # Check existence |
254 | $image = $services->getRepoGroup()->getLocalRepo() |
255 | ->newFile( $title ); |
256 | if ( $image->exists() ) { |
257 | if ( $this->hasOption( 'overwrite' ) ) { |
258 | $this->output( "{$base} exists, overwriting..." ); |
259 | $svar = 'overwritten'; |
260 | } else { |
261 | $this->output( "{$base} exists, skipping\n" ); |
262 | $statistics['skipped']++; |
263 | continue; |
264 | } |
265 | } else { |
266 | if ( $this->hasOption( 'skip-dupes' ) ) { |
267 | $repo = $image->getRepo(); |
268 | # XXX: we end up calculating this again when actually uploading. that sucks. |
269 | $sha1 = FSFile::getSha1Base36FromPath( $file ); |
270 | $dupes = $repo->findBySha1( $sha1 ); |
271 | if ( $dupes ) { |
272 | $this->output( |
273 | "{$base} already exists as {$dupes[0]->getName()}, skipping\n" |
274 | ); |
275 | $statistics['skipped']++; |
276 | continue; |
277 | } |
278 | } |
279 | |
280 | $this->output( "Importing {$base}..." ); |
281 | $svar = 'added'; |
282 | } |
283 | |
284 | if ( $sourceWikiUrl ) { |
285 | /* find comment text directly from source wiki, through MW's API */ |
286 | $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base ); |
287 | $commentText = $real_comment !== false ? $real_comment : $comment; |
288 | |
289 | /* find user directly from source wiki, through MW's API */ |
290 | $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base ); |
291 | if ( $real_user !== false ) { |
292 | $realUser = User::newFromName( $real_user ); |
293 | if ( $realUser === false ) { |
294 | # user does not exist in target wiki |
295 | $this->output( |
296 | "failed: user '$real_user' does not exist in target wiki." |
297 | ); |
298 | continue; |
299 | } |
300 | StubGlobalUser::setUser( $realUser ); |
301 | $user = $realUser; |
302 | } |
303 | } else { |
304 | # Find comment text |
305 | $commentText = false; |
306 | |
307 | if ( $commentExt ) { |
308 | $f = $this->findAuxFile( $file, $commentExt ); |
309 | if ( !$f ) { |
310 | $this->output( " No comment file with extension {$commentExt} found " |
311 | . "for {$file}, using default comment." ); |
312 | } else { |
313 | $commentText = file_get_contents( $f ); |
314 | if ( !$commentText ) { |
315 | $this->output( |
316 | " Failed to load comment file {$f}, using default comment." |
317 | ); |
318 | } |
319 | } |
320 | } |
321 | |
322 | if ( !$commentText ) { |
323 | $commentText = $comment; |
324 | } |
325 | } |
326 | |
327 | # Import the file |
328 | if ( $this->hasOption( 'dry' ) ) { |
329 | $this->output( |
330 | " publishing {$file} by '{$user->getName()}', comment '$commentText'..." |
331 | ); |
332 | } else { |
333 | $mwProps = new MWFileProps( $services->getMimeAnalyzer() ); |
334 | $props = $mwProps->getPropsFromPath( $file, true ); |
335 | $flags = 0; |
336 | $publishOptions = []; |
337 | $handler = MediaHandler::getHandler( $props['mime'] ); |
338 | if ( $handler ) { |
339 | $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] ); |
340 | } else { |
341 | $publishOptions['headers'] = []; |
342 | } |
343 | $archive = $image->publish( $file, $flags, $publishOptions ); |
344 | if ( !$archive->isGood() ) { |
345 | $this->output( "failed. (" . |
346 | $archive->getMessage( false, false, 'en' )->text() . |
347 | ")\n" ); |
348 | $statistics['failed']++; |
349 | continue; |
350 | } |
351 | } |
352 | |
353 | $commentText = SpecialUpload::getInitialPageText( $commentText, $license ); |
354 | if ( !$this->hasOption( 'summary' ) ) { |
355 | $summary = $commentText; |
356 | } |
357 | |
358 | if ( $this->hasOption( 'dry' ) ) { |
359 | $this->output( "done.\n" ); |
360 | } elseif ( $image->recordUpload3( |
361 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable |
362 | $archive->value, |
363 | $summary, |
364 | $commentText, |
365 | $user, |
366 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable |
367 | $props, |
368 | $timestamp, |
369 | $tags |
370 | )->isOK() ) { |
371 | $this->output( "done.\n" ); |
372 | |
373 | $doProtect = false; |
374 | |
375 | $protectLevel = $this->getOption( 'protect' ); |
376 | $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels ); |
377 | |
378 | if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) { |
379 | $doProtect = true; |
380 | } |
381 | if ( $this->hasOption( 'unprotect' ) ) { |
382 | $protectLevel = ''; |
383 | $doProtect = true; |
384 | } |
385 | |
386 | if ( $doProtect ) { |
387 | # Protect the file |
388 | $this->output( "\nWaiting for replica DBs...\n" ); |
389 | // Wait for replica DBs. |
390 | sleep( 2 ); # Why this sleep? |
391 | $this->waitForReplication(); |
392 | |
393 | $this->output( "\nSetting image restrictions ..." ); |
394 | |
395 | $cascade = false; |
396 | $restrictions = []; |
397 | foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) { |
398 | $restrictions[$type] = $protectLevel; |
399 | } |
400 | |
401 | $page = $services->getWikiPageFactory()->newFromTitle( $title ); |
402 | $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user ); |
403 | $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" ); |
404 | } |
405 | } else { |
406 | $this->output( "failed. (at recordUpload stage)\n" ); |
407 | $svar = 'failed'; |
408 | } |
409 | |
410 | $statistics[$svar]++; |
411 | $processed++; |
412 | |
413 | if ( $limit && $processed >= $limit ) { |
414 | break; |
415 | } |
416 | } |
417 | |
418 | # Print out some statistics |
419 | $this->output( "\n" ); |
420 | foreach ( array_merge( |
421 | [ |
422 | 'Found' => $found, |
423 | 'Limit' => $limit, |
424 | ], |
425 | $statistics |
426 | ) as $desc => $number ) { |
427 | if ( $number > 0 ) { |
428 | $this->output( ucfirst( $desc ) . ": $number\n" ); |
429 | } |
430 | } |
431 | } |
432 | |
433 | /** |
434 | * Search a directory for files with one of a set of extensions |
435 | * |
436 | * @param string $dir Path to directory to search |
437 | * @param array $exts Array of lowercase extensions to search for |
438 | * @param bool $recurse Search subdirectories recursively |
439 | * @return \Generator<string> Generator that iterating filenames |
440 | */ |
441 | private function findFiles( $dir, $exts, $recurse = false ) { |
442 | $dhl = is_dir( $dir ) ? opendir( $dir ) : false; |
443 | if ( !$dhl ) { |
444 | return; |
445 | } |
446 | |
447 | // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition |
448 | while ( ( $file = readdir( $dhl ) ) !== false ) { |
449 | if ( is_file( $dir . '/' . $file ) ) { |
450 | $ext = pathinfo( $file, PATHINFO_EXTENSION ); |
451 | if ( in_array( strtolower( $ext ), $exts ) ) { |
452 | yield $dir . '/' . $file; |
453 | } |
454 | } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) { |
455 | yield from $this->findFiles( $dir . '/' . $file, $exts, true ); |
456 | } |
457 | } |
458 | } |
459 | |
460 | /** |
461 | * Find an auxiliary file with the given extension, matching |
462 | * the give base file path. $maxStrip determines how many extensions |
463 | * may be stripped from the original file name before appending the |
464 | * new extension. For example, with $maxStrip = 1 (the default), |
465 | * file files acme.foo.bar.txt and acme.foo.txt would be auxilliary |
466 | * files for acme.foo.bar and the extension ".txt". With $maxStrip = 2, |
467 | * acme.txt would also be acceptable. |
468 | * |
469 | * @param string $file Base path |
470 | * @param string $auxExtension The extension to be appended to the base path |
471 | * @param int $maxStrip The maximum number of extensions to strip from the base path (default: 1) |
472 | * @return string|false |
473 | */ |
474 | private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) { |
475 | if ( !str_starts_with( $auxExtension, '.' ) ) { |
476 | $auxExtension = '.' . $auxExtension; |
477 | } |
478 | |
479 | $d = dirname( $file ); |
480 | $n = basename( $file ); |
481 | |
482 | while ( $maxStrip >= 0 ) { |
483 | $f = $d . '/' . $n . $auxExtension; |
484 | |
485 | if ( file_exists( $f ) ) { |
486 | return $f; |
487 | } |
488 | |
489 | $idx = strrpos( $n, '.' ); |
490 | if ( !$idx ) { |
491 | break; |
492 | } |
493 | |
494 | $n = substr( $n, 0, $idx ); |
495 | $maxStrip--; |
496 | } |
497 | |
498 | return false; |
499 | } |
500 | |
501 | /** |
502 | * @todo FIXME: Access the api in a better way and performing just one query |
503 | * (preferably batching files too). |
504 | * |
505 | * @param string $wiki_host |
506 | * @param string $file |
507 | * |
508 | * @return string|false |
509 | */ |
510 | private function getFileCommentFromSourceWiki( $wiki_host, $file ) { |
511 | $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:' |
512 | . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment'; |
513 | $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ ); |
514 | if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) { |
515 | return false; |
516 | } |
517 | |
518 | return html_entity_decode( $matches[1] ); |
519 | } |
520 | |
521 | private function getFileUserFromSourceWiki( $wiki_host, $file ) { |
522 | $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:' |
523 | . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user'; |
524 | $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ ); |
525 | if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) { |
526 | return false; |
527 | } |
528 | |
529 | return html_entity_decode( $matches[1] ); |
530 | } |
531 | |
532 | } |
533 | |
534 | // @codeCoverageIgnoreStart |
535 | $maintClass = ImportImages::class; |
536 | require_once RUN_MAINTENANCE_IF_MAIN; |
537 | // @codeCoverageIgnoreEnd |