MediaWiki master
importImages.php
Go to the documentation of this file.
1<?php
35require_once __DIR__ . '/Maintenance.php';
36
42
44
45 public function __construct() {
46 parent::__construct();
47
48 $this->addDescription( 'Imports images and other media files into the wiki' );
49 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
50
51 $this->addOption( 'extensions',
52 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
53 false,
54 true
55 );
56 $this->addOption( 'overwrite',
57 'Overwrite existing images with the same name (default is to skip them)' );
58 $this->addOption( 'limit',
59 'Limit the number of images to process. Ignored or skipped images are not counted',
60 false,
61 true
62 );
63 $this->addOption( 'from',
64 "Ignore all files until the one with the given name. Useful for resuming aborted "
65 . "imports. The name should be the file's canonical database form.",
66 false,
67 true
68 );
69 $this->addOption( 'skip-dupes',
70 'Skip images that were already uploaded under a different name (check SHA1)' );
71 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
72 $this->addOption( 'sleep',
73 'Sleep between files. Useful mostly for debugging',
74 false,
75 true
76 );
77 $this->addOption( 'user',
78 "Set username of uploader, default 'Maintenance script'",
79 false,
80 true
81 );
82 // This parameter can optionally have an argument. If none specified, getOption()
83 // returns 1 which is precisely what we need.
84 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
85 $this->addOption( 'comment',
86 "Set file description, default 'Importing file'",
87 false,
88 true
89 );
90 $this->addOption( 'comment-file',
91 'Set description to the content of this file',
92 false,
93 true
94 );
95 $this->addOption( 'comment-ext',
96 'Causes the description for each file to be loaded from a file with the same name, but '
97 . 'the extension provided. If a global description is also given, it is appended.',
98 false,
99 true
100 );
101 $this->addOption( 'summary',
102 'Upload summary, description will be used if not provided',
103 false,
104 true
105 );
106 $this->addOption( 'license',
107 'Use an optional license template',
108 false,
109 true
110 );
111 $this->addOption( 'timestamp',
112 'Override upload time/date, all MediaWiki timestamp formats are accepted',
113 false,
114 true
115 );
116 $this->addOption( 'protect',
117 'Specify the protect value (autoconfirmed,sysop)',
118 false,
119 true
120 );
121 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
122 $this->addOption( 'source-wiki-url',
123 'If specified, take User and Comment data for each imported file from this URL. '
124 . 'For example, --source-wiki-url="https://en.wikipedia.org/w/',
125 false,
126 true
127 );
128 $this->addOption( 'dry', "Dry run, don't import anything" );
129 }
130
131 public function execute() {
132 $services = $this->getServiceContainer();
133 $permissionManager = $services->getPermissionManager();
134
135 $found = 0;
136 $processed = 0;
137 $statistics = [
138 'ignored' => 0,
139 'added' => 0,
140 'skipped' => 0,
141 'overwritten' => 0,
142 'failed' => 0,
143 ];
144
145 $this->output( "Importing Files\n\n" );
146
147 $dir = $this->getArg( 0 );
148
149 # Check Protection
150 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
151 $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
152 }
153
154 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
155 $this->fatalError( "You must specify a protection option.\n" );
156 }
157
158 # Prepare the list of allowed extensions
159 $extensions = $this->hasOption( 'extensions' )
160 ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
161 : $this->getConfig()->get( MainConfigNames::FileExtensions );
162
163 # Search the path provided for candidates for import
164 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
165 if ( !$files->valid() ) {
166 $this->output( "No suitable files could be found for import.\n" );
167 return;
168 }
169
170 # Initialise the user for this operation
171 $user = $this->hasOption( 'user' )
172 ? User::newFromName( $this->getOption( 'user' ) )
173 : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
174 if ( !$user instanceof User ) {
175 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
176 }
177 '@phan-var User $user';
178 StubGlobalUser::setUser( $user );
179
180 # Get block check. If a value is given, this specified how often the check is performed
181 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
182
183 $from = $this->getOption( 'from' );
184 $sleep = (int)$this->getOption( 'sleep' );
185 $limit = (int)$this->getOption( 'limit' );
186 $timestamp = $this->getOption( 'timestamp', false );
187
188 # Get the upload comment. Provide a default one in case there's no comment given.
189 $commentFile = $this->getOption( 'comment-file' );
190 if ( $commentFile !== null ) {
191 $comment = file_get_contents( $commentFile );
192 if ( $comment === false || $comment === null ) {
193 $this->fatalError( "failed to read comment file: {$commentFile}\n" );
194 }
195 } else {
196 $comment = $this->getOption( 'comment', 'Importing file' );
197 }
198 $commentExt = $this->getOption( 'comment-ext' );
199 $summary = $this->getOption( 'summary', '' );
200 $license = $this->getOption( 'license', '' );
201 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
202
205 : [];
206
207 # Batch "upload" operation
208 $restrictionStore = $services->getRestrictionStore();
209 foreach ( $files as $file ) {
210 $found++;
211 if ( $sleep && ( $processed > 0 ) ) {
212 sleep( $sleep );
213 }
214
215 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
216
217 # Validate a title
218 $title = Title::makeTitleSafe( NS_FILE, $base );
219 if ( !$title ) {
220 $this->output(
221 "{$base} could not be imported; a valid title cannot be produced\n"
222 );
223 continue;
224 }
225
226 if ( $from ) {
227 if ( $from !== $title->getDBkey() ) {
228 $statistics['ignored']++;
229 continue;
230 }
231 // Found the requested file, continue from here
232 $from = null;
233 }
234
235 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
236 $user->clearInstanceCache( 'name' ); // reload from DB!
237 if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
238 $this->output(
239 "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
240 );
241 $statistics['skipped']++;
242 continue;
243 }
244 }
245
246 # Check existence
247 $image = $services->getRepoGroup()->getLocalRepo()
248 ->newFile( $title );
249 if ( $image->exists() ) {
250 if ( $this->hasOption( 'overwrite' ) ) {
251 $this->output( "{$base} exists, overwriting..." );
252 $svar = 'overwritten';
253 } else {
254 $this->output( "{$base} exists, skipping\n" );
255 $statistics['skipped']++;
256 continue;
257 }
258 } else {
259 if ( $this->hasOption( 'skip-dupes' ) ) {
260 $repo = $image->getRepo();
261 # XXX: we end up calculating this again when actually uploading. that sucks.
262 $sha1 = FSFile::getSha1Base36FromPath( $file );
263 $dupes = $repo->findBySha1( $sha1 );
264 if ( $dupes ) {
265 $this->output(
266 "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
267 );
268 $statistics['skipped']++;
269 continue;
270 }
271 }
272
273 $this->output( "Importing {$base}..." );
274 $svar = 'added';
275 }
276
277 if ( $sourceWikiUrl ) {
278 /* find comment text directly from source wiki, through MW's API */
279 $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
280 $commentText = $real_comment !== false ? $real_comment : $comment;
281
282 /* find user directly from source wiki, through MW's API */
283 $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
284 if ( $real_user !== false ) {
285 $realUser = User::newFromName( $real_user );
286 if ( $realUser === false ) {
287 # user does not exist in target wiki
288 $this->output(
289 "failed: user '$real_user' does not exist in target wiki."
290 );
291 continue;
292 }
293 StubGlobalUser::setUser( $realUser );
294 $user = $realUser;
295 }
296 } else {
297 # Find comment text
298 $commentText = false;
299
300 if ( $commentExt ) {
301 $f = $this->findAuxFile( $file, $commentExt );
302 if ( !$f ) {
303 $this->output( " No comment file with extension {$commentExt} found "
304 . "for {$file}, using default comment." );
305 } else {
306 $commentText = file_get_contents( $f );
307 if ( !$commentText ) {
308 $this->output(
309 " Failed to load comment file {$f}, using default comment."
310 );
311 }
312 }
313 }
314
315 if ( !$commentText ) {
316 $commentText = $comment;
317 }
318 }
319
320 # Import the file
321 if ( $this->hasOption( 'dry' ) ) {
322 $this->output(
323 " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
324 );
325 } else {
326 $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
327 $props = $mwProps->getPropsFromPath( $file, true );
328 $flags = 0;
329 $publishOptions = [];
330 $handler = MediaHandler::getHandler( $props['mime'] );
331 if ( $handler ) {
332 $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
333 } else {
334 $publishOptions['headers'] = [];
335 }
336 $archive = $image->publish( $file, $flags, $publishOptions );
337 if ( !$archive->isGood() ) {
338 $this->output( "failed. (" .
339 $archive->getMessage( false, false, 'en' )->text() .
340 ")\n" );
341 $statistics['failed']++;
342 continue;
343 }
344 }
345
346 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
347 if ( !$this->hasOption( 'summary' ) ) {
348 $summary = $commentText;
349 }
350
351 if ( $this->hasOption( 'dry' ) ) {
352 $this->output( "done.\n" );
353 } elseif ( $image->recordUpload3(
354 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
355 $archive->value,
356 $summary,
357 $commentText,
358 $user,
359 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
360 $props,
361 $timestamp,
362 $tags
363 )->isOK() ) {
364 $this->output( "done.\n" );
365
366 $doProtect = false;
367
368 $protectLevel = $this->getOption( 'protect' );
369 $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
370
371 if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
372 $doProtect = true;
373 }
374 if ( $this->hasOption( 'unprotect' ) ) {
375 $protectLevel = '';
376 $doProtect = true;
377 }
378
379 if ( $doProtect ) {
380 # Protect the file
381 $this->output( "\nWaiting for replica DBs...\n" );
382 // Wait for replica DBs.
383 sleep( 2 ); # Why this sleep?
384 $this->waitForReplication();
385
386 $this->output( "\nSetting image restrictions ..." );
387
388 $cascade = false;
389 $restrictions = [];
390 foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
391 $restrictions[$type] = $protectLevel;
392 }
393
394 $page = $services->getWikiPageFactory()->newFromTitle( $title );
395 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
396 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
397 }
398 } else {
399 $this->output( "failed. (at recordUpload stage)\n" );
400 $svar = 'failed';
401 }
402
403 $statistics[$svar]++;
404 $processed++;
405
406 if ( $limit && $processed >= $limit ) {
407 break;
408 }
409 }
410
411 # Print out some statistics
412 $this->output( "\n" );
413 foreach ( array_merge(
414 [
415 'Found' => $found,
416 'Limit' => $limit,
417 ],
418 $statistics
419 ) as $desc => $number ) {
420 if ( $number > 0 ) {
421 $this->output( ucfirst( $desc ) . ": $number\n" );
422 }
423 }
424 }
425
434 private function findFiles( $dir, $exts, $recurse = false ) {
435 $dhl = is_dir( $dir ) ? opendir( $dir ) : false;
436 if ( !$dhl ) {
437 return;
438 }
439
440 while ( ( $file = readdir( $dhl ) ) !== false ) {
441 if ( is_file( $dir . '/' . $file ) ) {
442 $ext = pathinfo( $file, PATHINFO_EXTENSION );
443 if ( in_array( strtolower( $ext ), $exts ) ) {
444 yield $dir . '/' . $file;
445 }
446 } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
447 yield from $this->findFiles( $dir . '/' . $file, $exts, true );
448 }
449 }
450 }
451
466 private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
467 if ( !str_starts_with( $auxExtension, '.' ) ) {
468 $auxExtension = '.' . $auxExtension;
469 }
470
471 $d = dirname( $file );
472 $n = basename( $file );
473
474 while ( $maxStrip >= 0 ) {
475 $f = $d . '/' . $n . $auxExtension;
476
477 if ( file_exists( $f ) ) {
478 return $f;
479 }
480
481 $idx = strrpos( $n, '.' );
482 if ( !$idx ) {
483 break;
484 }
485
486 $n = substr( $n, 0, $idx );
487 $maxStrip -= 1;
488 }
489
490 return false;
491 }
492
502 private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
503 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
504 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
505 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
506 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
507 return false;
508 }
509
510 return html_entity_decode( $matches[1] );
511 }
512
513 private function getFileUserFromSourceWiki( $wiki_host, $file ) {
514 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
515 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
516 $body = $this->getServiceContainer()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
517 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
518 return false;
519 }
520
521 return html_entity_decode( $matches[1] );
522 }
523
524}
525
526$maintClass = ImportImages::class;
527require_once RUN_MAINTENANCE_IF_MAIN;
const NS_FILE
Definition Defines.php:70
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
static getSoftwareTags( $all=false)
Loads defined core tags, checks for invalid types (if not array), and filters for supported and enabl...
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding,...
Definition FSFile.php:225
execute()
Do the actual work.
__construct()
Default constructor.
MimeMagic helper wrapper.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Form for handling uploads and special page.
Stub object for the global user ($wgUser) that makes it possible to change the relevant underlying ob...
Represents a title within MediaWiki.
Definition Title.php:78
internal since 1.36
Definition User.php:93
$maintClass