MediaWiki master
importImages.php
Go to the documentation of this file.
1<?php
21// @codeCoverageIgnoreStart
22require_once __DIR__ . '/Maintenance.php';
23// @codeCoverageIgnoreEnd
24
34
36
37 public function __construct() {
38 parent::__construct();
39
40 $this->addDescription( 'Imports images and other media files into the wiki' );
41 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
42
43 $this->addOption( 'extensions',
44 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
45 false,
46 true
47 );
48 $this->addOption( 'overwrite',
49 'Overwrite existing images with the same name (default is to skip them)' );
50 $this->addOption( 'limit',
51 'Limit the number of images to process. Ignored or skipped images are not counted',
52 false,
53 true
54 );
55 $this->addOption( 'from',
56 "Ignore all files until the one with the given name. Useful for resuming aborted "
57 . "imports. The name should be the file's canonical database form.",
58 false,
59 true
60 );
61 $this->addOption( 'skip-dupes',
62 'Skip images that were already uploaded under a different name (check SHA1)' );
63 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
64 $this->addOption( 'sleep',
65 'Sleep between files. Useful mostly for debugging',
66 false,
67 true
68 );
69 $this->addOption( 'user',
70 "Set username of uploader, default 'Maintenance script'",
71 false,
72 true
73 );
74 // This parameter can optionally have an argument. If none specified, getOption()
75 // returns 1 which is precisely what we need.
76 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
77 $this->addOption( 'comment',
78 "Set file description, default 'Importing file'",
79 false,
80 true
81 );
82 $this->addOption( 'comment-file',
83 'Set description to the content of this file',
84 false,
85 true
86 );
87 $this->addOption( 'comment-ext',
88 'Causes the description for each file to be loaded from a file with the same name, but '
89 . 'the extension provided. If a global description is also given, it is appended.',
90 false,
91 true
92 );
93 $this->addOption( 'summary',
94 'Upload summary, description will be used if not provided',
95 false,
96 true
97 );
98 $this->addOption( 'license',
99 'Use an optional license template',
100 false,
101 true
102 );
103 $this->addOption( 'timestamp',
104 'Override upload time/date, all MediaWiki timestamp formats are accepted',
105 false,
106 true
107 );
108 $this->addOption( 'protect',
109 'Specify the protect value (autoconfirmed,sysop)',
110 false,
111 true
112 );
113 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
114 $this->addOption( 'source-wiki-url',
115 'If specified, take User and Comment data for each imported file from this URL. '
116 . 'For example, --source-wiki-url="https://en.wikipedia.org/w/"',
117 false,
118 true
119 );
120 $this->addOption( 'dry', "Dry run, don't import anything" );
121 }
122
124 public function execute() {
125 $services = $this->getServiceContainer();
126 $permissionManager = $services->getPermissionManager();
127
128 $found = 0;
129 $processed = 0;
130 $statistics = [
131 'ignored' => 0,
132 'added' => 0,
133 'skipped' => 0,
134 'overwritten' => 0,
135 'failed' => 0,
136 ];
137
138 $this->output( "Importing Files\n\n" );
139
140 $dir = $this->getArg( 0 );
141
142 # Check Protection
143 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
144 $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
145 }
146
147 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
148 $this->fatalError( "You must specify a protection option.\n" );
149 }
150
151 # Prepare the list of allowed extensions
152 $extensions = $this->hasOption( 'extensions' )
153 ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
154 : $this->getConfig()->get( MainConfigNames::FileExtensions );
155
156 # Search the path provided for candidates for import
157 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
158 if ( !$files->valid() ) {
159 $this->output( "No suitable files could be found for import.\n" );
160 return false;
161 }
162
163 # Initialise the user for this operation
164 $user = $this->hasOption( 'user' )
165 ? User::newFromName( $this->getOption( 'user' ) )
166 : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
167 if ( !$user instanceof User ) {
168 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
169 }
170 '@phan-var User $user';
171
172 # Get block check. If a value is given, this specified how often the check is performed
173 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
174
175 $from = $this->getOption( 'from' );
176 $sleep = (int)$this->getOption( 'sleep' );
177 $limit = (int)$this->getOption( 'limit' );
178 $timestamp = $this->getOption( 'timestamp', false );
179
180 # Get the upload comment. Provide a default one in case there's no comment given.
181 $commentFile = $this->getOption( 'comment-file' );
182 if ( $commentFile !== null ) {
183 $comment = file_get_contents( $commentFile );
184 if ( $comment === false || $comment === null ) {
185 $this->fatalError( "failed to read comment file: {$commentFile}\n" );
186 }
187 } else {
188 $comment = $this->getOption( 'comment', 'Importing file' );
189 }
190 $commentExt = $this->getOption( 'comment-ext' );
191 $summary = $this->getOption( 'summary', '' );
192 $license = $this->getOption( 'license', '' );
193 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
194
195 $tags = in_array(
196 ChangeTags::TAG_SERVER_SIDE_UPLOAD,
197 $this->getServiceContainer()->getChangeTagsStore()->getSoftwareTags()
198 )
199 ? [ ChangeTags::TAG_SERVER_SIDE_UPLOAD ]
200 : [];
201
202 # Batch "upload" operation
203 $restrictionStore = $services->getRestrictionStore();
204 foreach ( $files as $file ) {
205 $found++;
206 if ( $sleep && ( $processed > 0 ) ) {
207 sleep( $sleep );
208 }
209
210 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
211
212 # Validate a title
213 $title = Title::makeTitleSafe( NS_FILE, $base );
214 if ( !$title ) {
215 $this->output(
216 "{$base} could not be imported; a valid title cannot be produced\n"
217 );
218 continue;
219 }
220
221 if ( $from ) {
222 if ( $from !== $title->getDBkey() ) {
223 $statistics['ignored']++;
224 continue;
225 }
226 // Found the requested file, continue from here
227 $from = null;
228 }
229
230 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
231 $user->clearInstanceCache( 'name' ); // reload from DB!
232 if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
233 $this->output(
234 "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
235 );
236 $statistics['skipped']++;
237 continue;
238 }
239 }
240
241 # Check existence
242 $image = $services->getRepoGroup()->getLocalRepo()
243 ->newFile( $title );
244 if ( $image->exists() ) {
245 if ( $this->hasOption( 'overwrite' ) ) {
246 $this->output( "{$base} exists, overwriting..." );
247 $svar = 'overwritten';
248 } else {
249 $this->output( "{$base} exists, skipping\n" );
250 $statistics['skipped']++;
251 continue;
252 }
253 } else {
254 if ( $this->hasOption( 'skip-dupes' ) ) {
255 $repo = $image->getRepo();
256 # XXX: we end up calculating this again when actually uploading. that sucks.
257 $sha1 = FSFile::getSha1Base36FromPath( $file );
258 $dupes = $repo->findBySha1( $sha1 );
259 if ( $dupes ) {
260 $this->output(
261 "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
262 );
263 $statistics['skipped']++;
264 continue;
265 }
266 }
267
268 $this->output( "Importing {$base}..." );
269 $svar = 'added';
270 }
271
272 if ( $sourceWikiUrl ) {
273 /* find comment text directly from source wiki, through MW's API */
274 $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
275 $commentText = $real_comment !== false ? $real_comment : $comment;
276
277 /* find user directly from source wiki, through MW's API */
278 $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
279 if ( $real_user !== false ) {
280 $realUser = User::newFromName( $real_user );
281 if ( $realUser === false ) {
282 # user does not exist in target wiki
283 $this->output(
284 "failed: user '$real_user' does not exist in target wiki."
285 );
286 continue;
287 }
288 $user = $realUser;
289 }
290 } else {
291 # Find comment text
292 $commentText = false;
293
294 if ( $commentExt ) {
295 $f = $this->findAuxFile( $file, $commentExt );
296 if ( !$f ) {
297 $this->output( " No comment file with extension {$commentExt} found "
298 . "for {$file}, using default comment." );
299 } else {
300 $commentText = file_get_contents( $f );
301 if ( !$commentText ) {
302 $this->output(
303 " Failed to load comment file {$f}, using default comment."
304 );
305 }
306 }
307 }
308
309 if ( !$commentText ) {
310 $commentText = $comment;
311 }
312 }
313
314 # Import the file
315 if ( $this->hasOption( 'dry' ) ) {
316 $this->output(
317 " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
318 );
319 } else {
320 $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
321 $props = $mwProps->getPropsFromPath( $file, true );
322 $flags = 0;
323 $publishOptions = [];
324 $handler = MediaHandler::getHandler( $props['mime'] );
325 if ( $handler ) {
326 $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
327 } else {
328 $publishOptions['headers'] = [];
329 }
330 $archive = $image->publish( $file, $flags, $publishOptions );
331 if ( !$archive->isGood() ) {
332 $this->output( "failed. (" .
333 $archive->getMessage( false, false, 'en' )->text() .
334 ")\n" );
335 $statistics['failed']++;
336 continue;
337 }
338 }
339
340 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
341 if ( !$this->hasOption( 'summary' ) ) {
342 $summary = $commentText;
343 }
344
345 if ( $this->hasOption( 'dry' ) ) {
346 $this->output( "done.\n" );
347 } else {
348 $uploadStatus = $image->recordUpload3(
349 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
350 $archive->value,
351 $summary,
352 $commentText,
353 $user,
354 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
355 $props,
356 $timestamp,
357 $tags
358 );
359
360 if ( $uploadStatus->isOK() ) {
361 $this->output( "done.\n" );
362
363 $doProtect = false;
364
365 $protectLevel = $this->getOption( 'protect' );
366 $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
367
368 if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
369 $doProtect = true;
370 }
371 if ( $this->hasOption( 'unprotect' ) ) {
372 $protectLevel = '';
373 $doProtect = true;
374 }
375
376 if ( $doProtect ) {
377 # Protect the file
378 $this->output( "\nWaiting for replica DBs...\n" );
379 // Wait for replica DBs.
380 sleep( 2 ); # Why this sleep?
381 $this->waitForReplication();
382
383 $this->output( "\nSetting image restrictions ..." );
384
385 $cascade = false;
386 $restrictions = [];
387 foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
388 $restrictions[$type] = $protectLevel;
389 }
390
391 $page = $services->getWikiPageFactory()->newFromTitle( $title );
392 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
393 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
394 }
395 } elseif ( $uploadStatus->hasMessage( 'fileexists-no-change' ) ) {
396 $this->output( "skipped. (fileexists-no-change)\n" );
397 $svar = 'skipped';
398 } else {
399 $errors = $uploadStatus->getMessages( 'error' );
400 $firstErrorKey = ( $errors !== [] ) ? $errors[0]->getKey() : 'unknown error at recordUpload';
401 $this->output( "failed. ($firstErrorKey)\n" );
402 $svar = 'failed';
403 }
404 }
405
406 $statistics[$svar]++;
407 $processed++;
408
409 if ( $limit && $processed >= $limit ) {
410 break;
411 }
412 }
413
414 # Print out some statistics
415 $this->output( "\n" );
416 foreach ( array_merge(
417 [
418 'Found' => $found,
419 'Limit' => $limit,
420 ],
421 $statistics
422 ) as $desc => $number ) {
423 if ( $number > 0 ) {
424 $this->output( ucfirst( $desc ) . ": $number\n" );
425 }
426 }
427
428 // Return true if there are no failed imports (= zero exit code), or
429 // return false if there are any failed imports (= non-zero exit code)
430 return $statistics['failed'] === 0;
431 }
432
441 private function findFiles( $dir, $exts, $recurse = false ) {
442 $dhl = is_dir( $dir ) ? opendir( $dir ) : false;
443 if ( !$dhl ) {
444 return;
445 }
446
447 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
448 while ( ( $file = readdir( $dhl ) ) !== false ) {
449 if ( is_file( $dir . '/' . $file ) ) {
450 $ext = pathinfo( $file, PATHINFO_EXTENSION );
451 if ( in_array( strtolower( $ext ), $exts ) ) {
452 yield $dir . '/' . $file;
453 }
454 } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
455 yield from $this->findFiles( $dir . '/' . $file, $exts, true );
456 }
457 }
458 }
459
474 private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
475 if ( !str_starts_with( $auxExtension, '.' ) ) {
476 $auxExtension = '.' . $auxExtension;
477 }
478
479 $d = dirname( $file );
480 $n = basename( $file );
481
482 while ( $maxStrip >= 0 ) {
483 $f = $d . '/' . $n . $auxExtension;
484
485 if ( file_exists( $f ) ) {
486 return $f;
487 }
488
489 $idx = strrpos( $n, '.' );
490 if ( !$idx ) {
491 break;
492 }
493
494 $n = substr( $n, 0, $idx );
495 $maxStrip--;
496 }
497
498 return false;
499 }
500
510 private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
511 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
512 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
513 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
514 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
515 return false;
516 }
517
518 return html_entity_decode( $matches[1] );
519 }
520
522 private function getFileUserFromSourceWiki( string $wiki_host, string $file ) {
523 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
524 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
525 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
526 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
527 return false;
528 }
529
530 return html_entity_decode( $matches[1] );
531 }
532
533}
534
535// @codeCoverageIgnoreStart
536$maintClass = ImportImages::class;
537require_once RUN_MAINTENANCE_IF_MAIN;
538// @codeCoverageIgnoreEnd
const NS_FILE
Definition Defines.php:57
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
execute()
Do the actual work.All child classes will need to implement thisbool|null|void True for success,...
__construct()
Default constructor.
Recent changes tagging.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Base media handler class.
Form for uploading media files.
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:130
MimeMagic helper wrapper.
Class representing a non-directory file on the file system.
Definition FSFile.php:20
$maintClass