MediaWiki master
importImages.php
Go to the documentation of this file.
1<?php
35// @codeCoverageIgnoreStart
36require_once __DIR__ . '/Maintenance.php';
37// @codeCoverageIgnoreEnd
38
47
49
50 public function __construct() {
51 parent::__construct();
52
53 $this->addDescription( 'Imports images and other media files into the wiki' );
54 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
55
56 $this->addOption( 'extensions',
57 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
58 false,
59 true
60 );
61 $this->addOption( 'overwrite',
62 'Overwrite existing images with the same name (default is to skip them)' );
63 $this->addOption( 'limit',
64 'Limit the number of images to process. Ignored or skipped images are not counted',
65 false,
66 true
67 );
68 $this->addOption( 'from',
69 "Ignore all files until the one with the given name. Useful for resuming aborted "
70 . "imports. The name should be the file's canonical database form.",
71 false,
72 true
73 );
74 $this->addOption( 'skip-dupes',
75 'Skip images that were already uploaded under a different name (check SHA1)' );
76 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
77 $this->addOption( 'sleep',
78 'Sleep between files. Useful mostly for debugging',
79 false,
80 true
81 );
82 $this->addOption( 'user',
83 "Set username of uploader, default 'Maintenance script'",
84 false,
85 true
86 );
87 // This parameter can optionally have an argument. If none specified, getOption()
88 // returns 1 which is precisely what we need.
89 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
90 $this->addOption( 'comment',
91 "Set file description, default 'Importing file'",
92 false,
93 true
94 );
95 $this->addOption( 'comment-file',
96 'Set description to the content of this file',
97 false,
98 true
99 );
100 $this->addOption( 'comment-ext',
101 'Causes the description for each file to be loaded from a file with the same name, but '
102 . 'the extension provided. If a global description is also given, it is appended.',
103 false,
104 true
105 );
106 $this->addOption( 'summary',
107 'Upload summary, description will be used if not provided',
108 false,
109 true
110 );
111 $this->addOption( 'license',
112 'Use an optional license template',
113 false,
114 true
115 );
116 $this->addOption( 'timestamp',
117 'Override upload time/date, all MediaWiki timestamp formats are accepted',
118 false,
119 true
120 );
121 $this->addOption( 'protect',
122 'Specify the protect value (autoconfirmed,sysop)',
123 false,
124 true
125 );
126 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
127 $this->addOption( 'source-wiki-url',
128 'If specified, take User and Comment data for each imported file from this URL. '
129 . 'For example, --source-wiki-url="https://en.wikipedia.org/w/',
130 false,
131 true
132 );
133 $this->addOption( 'dry', "Dry run, don't import anything" );
134 }
135
136 public function execute() {
137 $services = $this->getServiceContainer();
138 $permissionManager = $services->getPermissionManager();
139
140 $found = 0;
141 $processed = 0;
142 $statistics = [
143 'ignored' => 0,
144 'added' => 0,
145 'skipped' => 0,
146 'overwritten' => 0,
147 'failed' => 0,
148 ];
149
150 $this->output( "Importing Files\n\n" );
151
152 $dir = $this->getArg( 0 );
153
154 # Check Protection
155 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
156 $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
157 }
158
159 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
160 $this->fatalError( "You must specify a protection option.\n" );
161 }
162
163 # Prepare the list of allowed extensions
164 $extensions = $this->hasOption( 'extensions' )
165 ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
166 : $this->getConfig()->get( MainConfigNames::FileExtensions );
167
168 # Search the path provided for candidates for import
169 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
170 if ( !$files->valid() ) {
171 $this->output( "No suitable files could be found for import.\n" );
172 return false;
173 }
174
175 # Initialise the user for this operation
176 $user = $this->hasOption( 'user' )
177 ? User::newFromName( $this->getOption( 'user' ) )
178 : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
179 if ( !$user instanceof User ) {
180 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
181 }
182 '@phan-var User $user';
183 StubGlobalUser::setUser( $user );
184
185 # Get block check. If a value is given, this specified how often the check is performed
186 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
187
188 $from = $this->getOption( 'from' );
189 $sleep = (int)$this->getOption( 'sleep' );
190 $limit = (int)$this->getOption( 'limit' );
191 $timestamp = $this->getOption( 'timestamp', false );
192
193 # Get the upload comment. Provide a default one in case there's no comment given.
194 $commentFile = $this->getOption( 'comment-file' );
195 if ( $commentFile !== null ) {
196 $comment = file_get_contents( $commentFile );
197 if ( $comment === false || $comment === null ) {
198 $this->fatalError( "failed to read comment file: {$commentFile}\n" );
199 }
200 } else {
201 $comment = $this->getOption( 'comment', 'Importing file' );
202 }
203 $commentExt = $this->getOption( 'comment-ext' );
204 $summary = $this->getOption( 'summary', '' );
205 $license = $this->getOption( 'license', '' );
206 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
207
208 $tags = in_array(
209 ChangeTags::TAG_SERVER_SIDE_UPLOAD,
210 $this->getServiceContainer()->getChangeTagsStore()->getSoftwareTags()
211 )
212 ? [ ChangeTags::TAG_SERVER_SIDE_UPLOAD ]
213 : [];
214
215 # Batch "upload" operation
216 $restrictionStore = $services->getRestrictionStore();
217 foreach ( $files as $file ) {
218 $found++;
219 if ( $sleep && ( $processed > 0 ) ) {
220 sleep( $sleep );
221 }
222
223 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
224
225 # Validate a title
226 $title = Title::makeTitleSafe( NS_FILE, $base );
227 if ( !$title ) {
228 $this->output(
229 "{$base} could not be imported; a valid title cannot be produced\n"
230 );
231 continue;
232 }
233
234 if ( $from ) {
235 if ( $from !== $title->getDBkey() ) {
236 $statistics['ignored']++;
237 continue;
238 }
239 // Found the requested file, continue from here
240 $from = null;
241 }
242
243 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
244 $user->clearInstanceCache( 'name' ); // reload from DB!
245 if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
246 $this->output(
247 "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
248 );
249 $statistics['skipped']++;
250 continue;
251 }
252 }
253
254 # Check existence
255 $image = $services->getRepoGroup()->getLocalRepo()
256 ->newFile( $title );
257 if ( $image->exists() ) {
258 if ( $this->hasOption( 'overwrite' ) ) {
259 $this->output( "{$base} exists, overwriting..." );
260 $svar = 'overwritten';
261 } else {
262 $this->output( "{$base} exists, skipping\n" );
263 $statistics['skipped']++;
264 continue;
265 }
266 } else {
267 if ( $this->hasOption( 'skip-dupes' ) ) {
268 $repo = $image->getRepo();
269 # XXX: we end up calculating this again when actually uploading. that sucks.
270 $sha1 = FSFile::getSha1Base36FromPath( $file );
271 $dupes = $repo->findBySha1( $sha1 );
272 if ( $dupes ) {
273 $this->output(
274 "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
275 );
276 $statistics['skipped']++;
277 continue;
278 }
279 }
280
281 $this->output( "Importing {$base}..." );
282 $svar = 'added';
283 }
284
285 if ( $sourceWikiUrl ) {
286 /* find comment text directly from source wiki, through MW's API */
287 $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
288 $commentText = $real_comment !== false ? $real_comment : $comment;
289
290 /* find user directly from source wiki, through MW's API */
291 $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
292 if ( $real_user !== false ) {
293 $realUser = User::newFromName( $real_user );
294 if ( $realUser === false ) {
295 # user does not exist in target wiki
296 $this->output(
297 "failed: user '$real_user' does not exist in target wiki."
298 );
299 continue;
300 }
301 StubGlobalUser::setUser( $realUser );
302 $user = $realUser;
303 }
304 } else {
305 # Find comment text
306 $commentText = false;
307
308 if ( $commentExt ) {
309 $f = $this->findAuxFile( $file, $commentExt );
310 if ( !$f ) {
311 $this->output( " No comment file with extension {$commentExt} found "
312 . "for {$file}, using default comment." );
313 } else {
314 $commentText = file_get_contents( $f );
315 if ( !$commentText ) {
316 $this->output(
317 " Failed to load comment file {$f}, using default comment."
318 );
319 }
320 }
321 }
322
323 if ( !$commentText ) {
324 $commentText = $comment;
325 }
326 }
327
328 # Import the file
329 if ( $this->hasOption( 'dry' ) ) {
330 $this->output(
331 " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
332 );
333 } else {
334 $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
335 $props = $mwProps->getPropsFromPath( $file, true );
336 $flags = 0;
337 $publishOptions = [];
338 $handler = MediaHandler::getHandler( $props['mime'] );
339 if ( $handler ) {
340 $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
341 } else {
342 $publishOptions['headers'] = [];
343 }
344 $archive = $image->publish( $file, $flags, $publishOptions );
345 if ( !$archive->isGood() ) {
346 $this->output( "failed. (" .
347 $archive->getMessage( false, false, 'en' )->text() .
348 ")\n" );
349 $statistics['failed']++;
350 continue;
351 }
352 }
353
354 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
355 if ( !$this->hasOption( 'summary' ) ) {
356 $summary = $commentText;
357 }
358
359 if ( $this->hasOption( 'dry' ) ) {
360 $this->output( "done.\n" );
361 } elseif ( $image->recordUpload3(
362 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
363 $archive->value,
364 $summary,
365 $commentText,
366 $user,
367 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
368 $props,
369 $timestamp,
370 $tags
371 )->isOK() ) {
372 $this->output( "done.\n" );
373
374 $doProtect = false;
375
376 $protectLevel = $this->getOption( 'protect' );
377 $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
378
379 if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
380 $doProtect = true;
381 }
382 if ( $this->hasOption( 'unprotect' ) ) {
383 $protectLevel = '';
384 $doProtect = true;
385 }
386
387 if ( $doProtect ) {
388 # Protect the file
389 $this->output( "\nWaiting for replica DBs...\n" );
390 // Wait for replica DBs.
391 sleep( 2 ); # Why this sleep?
392 $this->waitForReplication();
393
394 $this->output( "\nSetting image restrictions ..." );
395
396 $cascade = false;
397 $restrictions = [];
398 foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
399 $restrictions[$type] = $protectLevel;
400 }
401
402 $page = $services->getWikiPageFactory()->newFromTitle( $title );
403 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
404 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
405 }
406 } else {
407 $this->output( "failed. (at recordUpload stage)\n" );
408 $svar = 'failed';
409 }
410
411 $statistics[$svar]++;
412 $processed++;
413
414 if ( $limit && $processed >= $limit ) {
415 break;
416 }
417 }
418
419 # Print out some statistics
420 $this->output( "\n" );
421 foreach ( array_merge(
422 [
423 'Found' => $found,
424 'Limit' => $limit,
425 ],
426 $statistics
427 ) as $desc => $number ) {
428 if ( $number > 0 ) {
429 $this->output( ucfirst( $desc ) . ": $number\n" );
430 }
431 }
432
433 // Return true if there are no failed imports (= zero exit code), or
434 // return false if there are any failed imports (= non-zero exit code)
435 return $statistics['failed'] === 0;
436 }
437
446 private function findFiles( $dir, $exts, $recurse = false ) {
447 $dhl = is_dir( $dir ) ? opendir( $dir ) : false;
448 if ( !$dhl ) {
449 return;
450 }
451
452 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
453 while ( ( $file = readdir( $dhl ) ) !== false ) {
454 if ( is_file( $dir . '/' . $file ) ) {
455 $ext = pathinfo( $file, PATHINFO_EXTENSION );
456 if ( in_array( strtolower( $ext ), $exts ) ) {
457 yield $dir . '/' . $file;
458 }
459 } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
460 yield from $this->findFiles( $dir . '/' . $file, $exts, true );
461 }
462 }
463 }
464
479 private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
480 if ( !str_starts_with( $auxExtension, '.' ) ) {
481 $auxExtension = '.' . $auxExtension;
482 }
483
484 $d = dirname( $file );
485 $n = basename( $file );
486
487 while ( $maxStrip >= 0 ) {
488 $f = $d . '/' . $n . $auxExtension;
489
490 if ( file_exists( $f ) ) {
491 return $f;
492 }
493
494 $idx = strrpos( $n, '.' );
495 if ( !$idx ) {
496 break;
497 }
498
499 $n = substr( $n, 0, $idx );
500 $maxStrip--;
501 }
502
503 return false;
504 }
505
515 private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
516 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
517 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
518 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
519 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
520 return false;
521 }
522
523 return html_entity_decode( $matches[1] );
524 }
525
527 private function getFileUserFromSourceWiki( string $wiki_host, string $file ) {
528 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
529 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
530 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
531 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
532 return false;
533 }
534
535 return html_entity_decode( $matches[1] );
536 }
537
538}
539
540// @codeCoverageIgnoreStart
541$maintClass = ImportImages::class;
542require_once RUN_MAINTENANCE_IF_MAIN;
543// @codeCoverageIgnoreEnd
const NS_FILE
Definition Defines.php:71
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
execute()
Do the actual work.
__construct()
Default constructor.
MimeMagic helper wrapper.
Recent changes tagging.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Form for uploading media files.
Stub object for the global user ($wgUser) that makes it possible to change the relevant underlying ob...
Represents a title within MediaWiki.
Definition Title.php:78
User class for the MediaWiki software.
Definition User.php:121
Class representing a non-directory file on the file system.
Definition FSFile.php:34
$maintClass