MediaWiki master
importImages.php
Go to the documentation of this file.
1<?php
35// @codeCoverageIgnoreStart
36require_once __DIR__ . '/Maintenance.php';
37// @codeCoverageIgnoreEnd
38
46
48
49 public function __construct() {
50 parent::__construct();
51
52 $this->addDescription( 'Imports images and other media files into the wiki' );
53 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
54
55 $this->addOption( 'extensions',
56 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
57 false,
58 true
59 );
60 $this->addOption( 'overwrite',
61 'Overwrite existing images with the same name (default is to skip them)' );
62 $this->addOption( 'limit',
63 'Limit the number of images to process. Ignored or skipped images are not counted',
64 false,
65 true
66 );
67 $this->addOption( 'from',
68 "Ignore all files until the one with the given name. Useful for resuming aborted "
69 . "imports. The name should be the file's canonical database form.",
70 false,
71 true
72 );
73 $this->addOption( 'skip-dupes',
74 'Skip images that were already uploaded under a different name (check SHA1)' );
75 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
76 $this->addOption( 'sleep',
77 'Sleep between files. Useful mostly for debugging',
78 false,
79 true
80 );
81 $this->addOption( 'user',
82 "Set username of uploader, default 'Maintenance script'",
83 false,
84 true
85 );
86 // This parameter can optionally have an argument. If none specified, getOption()
87 // returns 1 which is precisely what we need.
88 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
89 $this->addOption( 'comment',
90 "Set file description, default 'Importing file'",
91 false,
92 true
93 );
94 $this->addOption( 'comment-file',
95 'Set description to the content of this file',
96 false,
97 true
98 );
99 $this->addOption( 'comment-ext',
100 'Causes the description for each file to be loaded from a file with the same name, but '
101 . 'the extension provided. If a global description is also given, it is appended.',
102 false,
103 true
104 );
105 $this->addOption( 'summary',
106 'Upload summary, description will be used if not provided',
107 false,
108 true
109 );
110 $this->addOption( 'license',
111 'Use an optional license template',
112 false,
113 true
114 );
115 $this->addOption( 'timestamp',
116 'Override upload time/date, all MediaWiki timestamp formats are accepted',
117 false,
118 true
119 );
120 $this->addOption( 'protect',
121 'Specify the protect value (autoconfirmed,sysop)',
122 false,
123 true
124 );
125 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
126 $this->addOption( 'source-wiki-url',
127 'If specified, take User and Comment data for each imported file from this URL. '
128 . 'For example, --source-wiki-url="https://en.wikipedia.org/w/',
129 false,
130 true
131 );
132 $this->addOption( 'dry', "Dry run, don't import anything" );
133 }
134
135 public function execute() {
136 $services = $this->getServiceContainer();
137 $permissionManager = $services->getPermissionManager();
138
139 $found = 0;
140 $processed = 0;
141 $statistics = [
142 'ignored' => 0,
143 'added' => 0,
144 'skipped' => 0,
145 'overwritten' => 0,
146 'failed' => 0,
147 ];
148
149 $this->output( "Importing Files\n\n" );
150
151 $dir = $this->getArg( 0 );
152
153 # Check Protection
154 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
155 $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
156 }
157
158 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
159 $this->fatalError( "You must specify a protection option.\n" );
160 }
161
162 # Prepare the list of allowed extensions
163 $extensions = $this->hasOption( 'extensions' )
164 ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
165 : $this->getConfig()->get( MainConfigNames::FileExtensions );
166
167 # Search the path provided for candidates for import
168 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
169 if ( !$files->valid() ) {
170 $this->output( "No suitable files could be found for import.\n" );
171 return;
172 }
173
174 # Initialise the user for this operation
175 $user = $this->hasOption( 'user' )
176 ? User::newFromName( $this->getOption( 'user' ) )
177 : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
178 if ( !$user instanceof User ) {
179 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
180 }
181 '@phan-var User $user';
182 StubGlobalUser::setUser( $user );
183
184 # Get block check. If a value is given, this specified how often the check is performed
185 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
186
187 $from = $this->getOption( 'from' );
188 $sleep = (int)$this->getOption( 'sleep' );
189 $limit = (int)$this->getOption( 'limit' );
190 $timestamp = $this->getOption( 'timestamp', false );
191
192 # Get the upload comment. Provide a default one in case there's no comment given.
193 $commentFile = $this->getOption( 'comment-file' );
194 if ( $commentFile !== null ) {
195 $comment = file_get_contents( $commentFile );
196 if ( $comment === false || $comment === null ) {
197 $this->fatalError( "failed to read comment file: {$commentFile}\n" );
198 }
199 } else {
200 $comment = $this->getOption( 'comment', 'Importing file' );
201 }
202 $commentExt = $this->getOption( 'comment-ext' );
203 $summary = $this->getOption( 'summary', '' );
204 $license = $this->getOption( 'license', '' );
205 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
206
207 $tags = in_array(
209 $this->getServiceContainer()->getChangeTagsStore()->getSoftwareTags()
210 )
212 : [];
213
214 # Batch "upload" operation
215 $restrictionStore = $services->getRestrictionStore();
216 foreach ( $files as $file ) {
217 $found++;
218 if ( $sleep && ( $processed > 0 ) ) {
219 sleep( $sleep );
220 }
221
222 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
223
224 # Validate a title
225 $title = Title::makeTitleSafe( NS_FILE, $base );
226 if ( !$title ) {
227 $this->output(
228 "{$base} could not be imported; a valid title cannot be produced\n"
229 );
230 continue;
231 }
232
233 if ( $from ) {
234 if ( $from !== $title->getDBkey() ) {
235 $statistics['ignored']++;
236 continue;
237 }
238 // Found the requested file, continue from here
239 $from = null;
240 }
241
242 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
243 $user->clearInstanceCache( 'name' ); // reload from DB!
244 if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
245 $this->output(
246 "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
247 );
248 $statistics['skipped']++;
249 continue;
250 }
251 }
252
253 # Check existence
254 $image = $services->getRepoGroup()->getLocalRepo()
255 ->newFile( $title );
256 if ( $image->exists() ) {
257 if ( $this->hasOption( 'overwrite' ) ) {
258 $this->output( "{$base} exists, overwriting..." );
259 $svar = 'overwritten';
260 } else {
261 $this->output( "{$base} exists, skipping\n" );
262 $statistics['skipped']++;
263 continue;
264 }
265 } else {
266 if ( $this->hasOption( 'skip-dupes' ) ) {
267 $repo = $image->getRepo();
268 # XXX: we end up calculating this again when actually uploading. that sucks.
269 $sha1 = FSFile::getSha1Base36FromPath( $file );
270 $dupes = $repo->findBySha1( $sha1 );
271 if ( $dupes ) {
272 $this->output(
273 "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
274 );
275 $statistics['skipped']++;
276 continue;
277 }
278 }
279
280 $this->output( "Importing {$base}..." );
281 $svar = 'added';
282 }
283
284 if ( $sourceWikiUrl ) {
285 /* find comment text directly from source wiki, through MW's API */
286 $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
287 $commentText = $real_comment !== false ? $real_comment : $comment;
288
289 /* find user directly from source wiki, through MW's API */
290 $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
291 if ( $real_user !== false ) {
292 $realUser = User::newFromName( $real_user );
293 if ( $realUser === false ) {
294 # user does not exist in target wiki
295 $this->output(
296 "failed: user '$real_user' does not exist in target wiki."
297 );
298 continue;
299 }
300 StubGlobalUser::setUser( $realUser );
301 $user = $realUser;
302 }
303 } else {
304 # Find comment text
305 $commentText = false;
306
307 if ( $commentExt ) {
308 $f = $this->findAuxFile( $file, $commentExt );
309 if ( !$f ) {
310 $this->output( " No comment file with extension {$commentExt} found "
311 . "for {$file}, using default comment." );
312 } else {
313 $commentText = file_get_contents( $f );
314 if ( !$commentText ) {
315 $this->output(
316 " Failed to load comment file {$f}, using default comment."
317 );
318 }
319 }
320 }
321
322 if ( !$commentText ) {
323 $commentText = $comment;
324 }
325 }
326
327 # Import the file
328 if ( $this->hasOption( 'dry' ) ) {
329 $this->output(
330 " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
331 );
332 } else {
333 $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
334 $props = $mwProps->getPropsFromPath( $file, true );
335 $flags = 0;
336 $publishOptions = [];
337 $handler = MediaHandler::getHandler( $props['mime'] );
338 if ( $handler ) {
339 $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
340 } else {
341 $publishOptions['headers'] = [];
342 }
343 $archive = $image->publish( $file, $flags, $publishOptions );
344 if ( !$archive->isGood() ) {
345 $this->output( "failed. (" .
346 $archive->getMessage( false, false, 'en' )->text() .
347 ")\n" );
348 $statistics['failed']++;
349 continue;
350 }
351 }
352
353 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
354 if ( !$this->hasOption( 'summary' ) ) {
355 $summary = $commentText;
356 }
357
358 if ( $this->hasOption( 'dry' ) ) {
359 $this->output( "done.\n" );
360 } elseif ( $image->recordUpload3(
361 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
362 $archive->value,
363 $summary,
364 $commentText,
365 $user,
366 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
367 $props,
368 $timestamp,
369 $tags
370 )->isOK() ) {
371 $this->output( "done.\n" );
372
373 $doProtect = false;
374
375 $protectLevel = $this->getOption( 'protect' );
376 $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
377
378 if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
379 $doProtect = true;
380 }
381 if ( $this->hasOption( 'unprotect' ) ) {
382 $protectLevel = '';
383 $doProtect = true;
384 }
385
386 if ( $doProtect ) {
387 # Protect the file
388 $this->output( "\nWaiting for replica DBs...\n" );
389 // Wait for replica DBs.
390 sleep( 2 ); # Why this sleep?
391 $this->waitForReplication();
392
393 $this->output( "\nSetting image restrictions ..." );
394
395 $cascade = false;
396 $restrictions = [];
397 foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
398 $restrictions[$type] = $protectLevel;
399 }
400
401 $page = $services->getWikiPageFactory()->newFromTitle( $title );
402 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
403 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
404 }
405 } else {
406 $this->output( "failed. (at recordUpload stage)\n" );
407 $svar = 'failed';
408 }
409
410 $statistics[$svar]++;
411 $processed++;
412
413 if ( $limit && $processed >= $limit ) {
414 break;
415 }
416 }
417
418 # Print out some statistics
419 $this->output( "\n" );
420 foreach ( array_merge(
421 [
422 'Found' => $found,
423 'Limit' => $limit,
424 ],
425 $statistics
426 ) as $desc => $number ) {
427 if ( $number > 0 ) {
428 $this->output( ucfirst( $desc ) . ": $number\n" );
429 }
430 }
431 }
432
441 private function findFiles( $dir, $exts, $recurse = false ) {
442 $dhl = is_dir( $dir ) ? opendir( $dir ) : false;
443 if ( !$dhl ) {
444 return;
445 }
446
447 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
448 while ( ( $file = readdir( $dhl ) ) !== false ) {
449 if ( is_file( $dir . '/' . $file ) ) {
450 $ext = pathinfo( $file, PATHINFO_EXTENSION );
451 if ( in_array( strtolower( $ext ), $exts ) ) {
452 yield $dir . '/' . $file;
453 }
454 } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
455 yield from $this->findFiles( $dir . '/' . $file, $exts, true );
456 }
457 }
458 }
459
474 private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
475 if ( !str_starts_with( $auxExtension, '.' ) ) {
476 $auxExtension = '.' . $auxExtension;
477 }
478
479 $d = dirname( $file );
480 $n = basename( $file );
481
482 while ( $maxStrip >= 0 ) {
483 $f = $d . '/' . $n . $auxExtension;
484
485 if ( file_exists( $f ) ) {
486 return $f;
487 }
488
489 $idx = strrpos( $n, '.' );
490 if ( !$idx ) {
491 break;
492 }
493
494 $n = substr( $n, 0, $idx );
495 $maxStrip--;
496 }
497
498 return false;
499 }
500
510 private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
511 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
512 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
513 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
514 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
515 return false;
516 }
517
518 return html_entity_decode( $matches[1] );
519 }
520
521 private function getFileUserFromSourceWiki( $wiki_host, $file ) {
522 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
523 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
524 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
525 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
526 return false;
527 }
528
529 return html_entity_decode( $matches[1] );
530 }
531
532}
533
534// @codeCoverageIgnoreStart
535$maintClass = ImportImages::class;
536require_once RUN_MAINTENANCE_IF_MAIN;
537// @codeCoverageIgnoreEnd
const NS_FILE
Definition Defines.php:71
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
execute()
Do the actual work.
__construct()
Default constructor.
MimeMagic helper wrapper.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Form for uploading media files.
Stub object for the global user ($wgUser) that makes it possible to change the relevant underlying ob...
Represents a title within MediaWiki.
Definition Title.php:78
internal since 1.36
Definition User.php:93
Class representing a non-directory file on the file system.
Definition FSFile.php:34
$maintClass