MediaWiki master
importImages.php
Go to the documentation of this file.
1<?php
21// @codeCoverageIgnoreStart
22require_once __DIR__ . '/Maintenance.php';
23// @codeCoverageIgnoreEnd
24
33
35
36 public function __construct() {
37 parent::__construct();
38
39 $this->addDescription( 'Imports images and other media files into the wiki' );
40 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
41
42 $this->addOption( 'extensions',
43 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
44 false,
45 true
46 );
47 $this->addOption( 'overwrite',
48 'Overwrite existing images with the same name (default is to skip them)' );
49 $this->addOption( 'limit',
50 'Limit the number of images to process. Ignored or skipped images are not counted',
51 false,
52 true
53 );
54 $this->addOption( 'from',
55 "Ignore all files until the one with the given name. Useful for resuming aborted "
56 . "imports. The name should be the file's canonical database form.",
57 false,
58 true
59 );
60 $this->addOption( 'skip-dupes',
61 'Skip images that were already uploaded under a different name (check SHA1)' );
62 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
63 $this->addOption( 'sleep',
64 'Sleep between files. Useful mostly for debugging',
65 false,
66 true
67 );
68 $this->addOption( 'user',
69 "Set username of uploader, default 'Maintenance script'",
70 false,
71 true
72 );
73 // This parameter can optionally have an argument. If none specified, getOption()
74 // returns 1 which is precisely what we need.
75 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
76 $this->addOption( 'comment',
77 "Set file description, default 'Importing file'",
78 false,
79 true
80 );
81 $this->addOption( 'comment-file',
82 'Set description to the content of this file',
83 false,
84 true
85 );
86 $this->addOption( 'comment-ext',
87 'Causes the description for each file to be loaded from a file with the same name, but '
88 . 'the extension provided. If a global description is also given, it is appended.',
89 false,
90 true
91 );
92 $this->addOption( 'summary',
93 'Upload summary, description will be used if not provided',
94 false,
95 true
96 );
97 $this->addOption( 'license',
98 'Use an optional license template',
99 false,
100 true
101 );
102 $this->addOption( 'timestamp',
103 'Override upload time/date, all MediaWiki timestamp formats are accepted',
104 false,
105 true
106 );
107 $this->addOption( 'protect',
108 'Specify the protect value (autoconfirmed,sysop)',
109 false,
110 true
111 );
112 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
113 $this->addOption( 'source-wiki-url',
114 'If specified, take User and Comment data for each imported file from this URL. '
115 . 'For example, --source-wiki-url="https://en.wikipedia.org/w/',
116 false,
117 true
118 );
119 $this->addOption( 'dry', "Dry run, don't import anything" );
120 }
121
123 public function execute() {
124 $services = $this->getServiceContainer();
125 $permissionManager = $services->getPermissionManager();
126
127 $found = 0;
128 $processed = 0;
129 $statistics = [
130 'ignored' => 0,
131 'added' => 0,
132 'skipped' => 0,
133 'overwritten' => 0,
134 'failed' => 0,
135 ];
136
137 $this->output( "Importing Files\n\n" );
138
139 $dir = $this->getArg( 0 );
140
141 # Check Protection
142 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
143 $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
144 }
145
146 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
147 $this->fatalError( "You must specify a protection option.\n" );
148 }
149
150 # Prepare the list of allowed extensions
151 $extensions = $this->hasOption( 'extensions' )
152 ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
153 : $this->getConfig()->get( MainConfigNames::FileExtensions );
154
155 # Search the path provided for candidates for import
156 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
157 if ( !$files->valid() ) {
158 $this->output( "No suitable files could be found for import.\n" );
159 return false;
160 }
161
162 # Initialise the user for this operation
163 $user = $this->hasOption( 'user' )
164 ? User::newFromName( $this->getOption( 'user' ) )
165 : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
166 if ( !$user instanceof User ) {
167 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
168 }
169 '@phan-var User $user';
170 StubGlobalUser::setUser( $user );
171
172 # Get block check. If a value is given, this specified how often the check is performed
173 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
174
175 $from = $this->getOption( 'from' );
176 $sleep = (int)$this->getOption( 'sleep' );
177 $limit = (int)$this->getOption( 'limit' );
178 $timestamp = $this->getOption( 'timestamp', false );
179
180 # Get the upload comment. Provide a default one in case there's no comment given.
181 $commentFile = $this->getOption( 'comment-file' );
182 if ( $commentFile !== null ) {
183 $comment = file_get_contents( $commentFile );
184 if ( $comment === false || $comment === null ) {
185 $this->fatalError( "failed to read comment file: {$commentFile}\n" );
186 }
187 } else {
188 $comment = $this->getOption( 'comment', 'Importing file' );
189 }
190 $commentExt = $this->getOption( 'comment-ext' );
191 $summary = $this->getOption( 'summary', '' );
192 $license = $this->getOption( 'license', '' );
193 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
194
195 $tags = in_array(
196 ChangeTags::TAG_SERVER_SIDE_UPLOAD,
197 $this->getServiceContainer()->getChangeTagsStore()->getSoftwareTags()
198 )
199 ? [ ChangeTags::TAG_SERVER_SIDE_UPLOAD ]
200 : [];
201
202 # Batch "upload" operation
203 $restrictionStore = $services->getRestrictionStore();
204 foreach ( $files as $file ) {
205 $found++;
206 if ( $sleep && ( $processed > 0 ) ) {
207 sleep( $sleep );
208 }
209
210 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
211
212 # Validate a title
213 $title = Title::makeTitleSafe( NS_FILE, $base );
214 if ( !$title ) {
215 $this->output(
216 "{$base} could not be imported; a valid title cannot be produced\n"
217 );
218 continue;
219 }
220
221 if ( $from ) {
222 if ( $from !== $title->getDBkey() ) {
223 $statistics['ignored']++;
224 continue;
225 }
226 // Found the requested file, continue from here
227 $from = null;
228 }
229
230 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
231 $user->clearInstanceCache( 'name' ); // reload from DB!
232 if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
233 $this->output(
234 "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
235 );
236 $statistics['skipped']++;
237 continue;
238 }
239 }
240
241 # Check existence
242 $image = $services->getRepoGroup()->getLocalRepo()
243 ->newFile( $title );
244 if ( $image->exists() ) {
245 if ( $this->hasOption( 'overwrite' ) ) {
246 $this->output( "{$base} exists, overwriting..." );
247 $svar = 'overwritten';
248 } else {
249 $this->output( "{$base} exists, skipping\n" );
250 $statistics['skipped']++;
251 continue;
252 }
253 } else {
254 if ( $this->hasOption( 'skip-dupes' ) ) {
255 $repo = $image->getRepo();
256 # XXX: we end up calculating this again when actually uploading. that sucks.
257 $sha1 = FSFile::getSha1Base36FromPath( $file );
258 $dupes = $repo->findBySha1( $sha1 );
259 if ( $dupes ) {
260 $this->output(
261 "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
262 );
263 $statistics['skipped']++;
264 continue;
265 }
266 }
267
268 $this->output( "Importing {$base}..." );
269 $svar = 'added';
270 }
271
272 if ( $sourceWikiUrl ) {
273 /* find comment text directly from source wiki, through MW's API */
274 $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
275 $commentText = $real_comment !== false ? $real_comment : $comment;
276
277 /* find user directly from source wiki, through MW's API */
278 $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
279 if ( $real_user !== false ) {
280 $realUser = User::newFromName( $real_user );
281 if ( $realUser === false ) {
282 # user does not exist in target wiki
283 $this->output(
284 "failed: user '$real_user' does not exist in target wiki."
285 );
286 continue;
287 }
288 StubGlobalUser::setUser( $realUser );
289 $user = $realUser;
290 }
291 } else {
292 # Find comment text
293 $commentText = false;
294
295 if ( $commentExt ) {
296 $f = $this->findAuxFile( $file, $commentExt );
297 if ( !$f ) {
298 $this->output( " No comment file with extension {$commentExt} found "
299 . "for {$file}, using default comment." );
300 } else {
301 $commentText = file_get_contents( $f );
302 if ( !$commentText ) {
303 $this->output(
304 " Failed to load comment file {$f}, using default comment."
305 );
306 }
307 }
308 }
309
310 if ( !$commentText ) {
311 $commentText = $comment;
312 }
313 }
314
315 # Import the file
316 if ( $this->hasOption( 'dry' ) ) {
317 $this->output(
318 " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
319 );
320 } else {
321 $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
322 $props = $mwProps->getPropsFromPath( $file, true );
323 $flags = 0;
324 $publishOptions = [];
325 $handler = MediaHandler::getHandler( $props['mime'] );
326 if ( $handler ) {
327 $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
328 } else {
329 $publishOptions['headers'] = [];
330 }
331 $archive = $image->publish( $file, $flags, $publishOptions );
332 if ( !$archive->isGood() ) {
333 $this->output( "failed. (" .
334 $archive->getMessage( false, false, 'en' )->text() .
335 ")\n" );
336 $statistics['failed']++;
337 continue;
338 }
339 }
340
341 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
342 if ( !$this->hasOption( 'summary' ) ) {
343 $summary = $commentText;
344 }
345
346 if ( $this->hasOption( 'dry' ) ) {
347 $this->output( "done.\n" );
348 } else {
349 $uploadStatus = $image->recordUpload3(
350 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
351 $archive->value,
352 $summary,
353 $commentText,
354 $user,
355 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
356 $props,
357 $timestamp,
358 $tags
359 );
360
361 if ( $uploadStatus->isOK() ) {
362 $this->output( "done.\n" );
363
364 $doProtect = false;
365
366 $protectLevel = $this->getOption( 'protect' );
367 $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
368
369 if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
370 $doProtect = true;
371 }
372 if ( $this->hasOption( 'unprotect' ) ) {
373 $protectLevel = '';
374 $doProtect = true;
375 }
376
377 if ( $doProtect ) {
378 # Protect the file
379 $this->output( "\nWaiting for replica DBs...\n" );
380 // Wait for replica DBs.
381 sleep( 2 ); # Why this sleep?
382 $this->waitForReplication();
383
384 $this->output( "\nSetting image restrictions ..." );
385
386 $cascade = false;
387 $restrictions = [];
388 foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
389 $restrictions[$type] = $protectLevel;
390 }
391
392 $page = $services->getWikiPageFactory()->newFromTitle( $title );
393 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
394 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
395 }
396 } elseif ( $uploadStatus->hasMessage( 'fileexists-no-change' ) ) {
397 $this->output( "skipped. (fileexists-no-change)\n" );
398 $svar = 'skipped';
399 } else {
400 $errors = $uploadStatus->getMessages( 'error' );
401 $firstErrorKey = ( $errors !== [] ) ? $errors[0]->getKey() : 'unknown error at recordUpload';
402 $this->output( "failed. ($firstErrorKey)\n" );
403 $svar = 'failed';
404 }
405 }
406
407 $statistics[$svar]++;
408 $processed++;
409
410 if ( $limit && $processed >= $limit ) {
411 break;
412 }
413 }
414
415 # Print out some statistics
416 $this->output( "\n" );
417 foreach ( array_merge(
418 [
419 'Found' => $found,
420 'Limit' => $limit,
421 ],
422 $statistics
423 ) as $desc => $number ) {
424 if ( $number > 0 ) {
425 $this->output( ucfirst( $desc ) . ": $number\n" );
426 }
427 }
428
429 // Return true if there are no failed imports (= zero exit code), or
430 // return false if there are any failed imports (= non-zero exit code)
431 return $statistics['failed'] === 0;
432 }
433
442 private function findFiles( $dir, $exts, $recurse = false ) {
443 $dhl = is_dir( $dir ) ? opendir( $dir ) : false;
444 if ( !$dhl ) {
445 return;
446 }
447
448 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
449 while ( ( $file = readdir( $dhl ) ) !== false ) {
450 if ( is_file( $dir . '/' . $file ) ) {
451 $ext = pathinfo( $file, PATHINFO_EXTENSION );
452 if ( in_array( strtolower( $ext ), $exts ) ) {
453 yield $dir . '/' . $file;
454 }
455 } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
456 yield from $this->findFiles( $dir . '/' . $file, $exts, true );
457 }
458 }
459 }
460
475 private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
476 if ( !str_starts_with( $auxExtension, '.' ) ) {
477 $auxExtension = '.' . $auxExtension;
478 }
479
480 $d = dirname( $file );
481 $n = basename( $file );
482
483 while ( $maxStrip >= 0 ) {
484 $f = $d . '/' . $n . $auxExtension;
485
486 if ( file_exists( $f ) ) {
487 return $f;
488 }
489
490 $idx = strrpos( $n, '.' );
491 if ( !$idx ) {
492 break;
493 }
494
495 $n = substr( $n, 0, $idx );
496 $maxStrip--;
497 }
498
499 return false;
500 }
501
511 private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
512 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
513 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
514 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
515 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
516 return false;
517 }
518
519 return html_entity_decode( $matches[1] );
520 }
521
523 private function getFileUserFromSourceWiki( string $wiki_host, string $file ) {
524 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
525 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
526 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
527 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
528 return false;
529 }
530
531 return html_entity_decode( $matches[1] );
532 }
533
534}
535
536// @codeCoverageIgnoreStart
537$maintClass = ImportImages::class;
538require_once RUN_MAINTENANCE_IF_MAIN;
539// @codeCoverageIgnoreEnd
const NS_FILE
Definition Defines.php:57
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
execute()
Do the actual work.All child classes will need to implement thisbool|null|void True for success,...
__construct()
Default constructor.
Recent changes tagging.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Form for uploading media files.
Stub object for the global user ($wgUser) that makes it possible to change the relevant underlying ob...
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:130
Class representing a non-directory file on the file system.
Definition FSFile.php:20
$maintClass