MediaWiki REL1_39
importImages.php
Go to the documentation of this file.
1<?php
35require_once __DIR__ . '/Maintenance.php';
36
39
41
42 public function __construct() {
43 parent::__construct();
44
45 $this->addDescription( 'Imports images and other media files into the wiki' );
46 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
47
48 $this->addOption( 'extensions',
49 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
50 false,
51 true
52 );
53 $this->addOption( 'overwrite',
54 'Overwrite existing images with the same name (default is to skip them)' );
55 $this->addOption( 'limit',
56 'Limit the number of images to process. Ignored or skipped images are not counted',
57 false,
58 true
59 );
60 $this->addOption( 'from',
61 "Ignore all files until the one with the given name. Useful for resuming aborted "
62 . "imports. The name should be the file's canonical database form.",
63 false,
64 true
65 );
66 $this->addOption( 'skip-dupes',
67 'Skip images that were already uploaded under a different name (check SHA1)' );
68 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
69 $this->addOption( 'sleep',
70 'Sleep between files. Useful mostly for debugging',
71 false,
72 true
73 );
74 $this->addOption( 'user',
75 "Set username of uploader, default 'Maintenance script'",
76 false,
77 true
78 );
79 // This parameter can optionally have an argument. If none specified, getOption()
80 // returns 1 which is precisely what we need.
81 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
82 $this->addOption( 'comment',
83 "Set file description, default 'Importing file'",
84 false,
85 true
86 );
87 $this->addOption( 'comment-file',
88 'Set description to the content of this file',
89 false,
90 true
91 );
92 $this->addOption( 'comment-ext',
93 'Causes the description for each file to be loaded from a file with the same name, but '
94 . 'the extension provided. If a global description is also given, it is appended.',
95 false,
96 true
97 );
98 $this->addOption( 'summary',
99 'Upload summary, description will be used if not provided',
100 false,
101 true
102 );
103 $this->addOption( 'license',
104 'Use an optional license template',
105 false,
106 true
107 );
108 $this->addOption( 'timestamp',
109 'Override upload time/date, all MediaWiki timestamp formats are accepted',
110 false,
111 true
112 );
113 $this->addOption( 'protect',
114 'Specify the protect value (autoconfirmed,sysop)',
115 false,
116 true
117 );
118 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
119 $this->addOption( 'source-wiki-url',
120 'If specified, take User and Comment data for each imported file from this URL. '
121 . 'For example, --source-wiki-url="http://en.wikipedia.org/',
122 false,
123 true
124 );
125 $this->addOption( 'dry', "Dry run, don't import anything" );
126 }
127
128 public function execute() {
129 $services = MediaWikiServices::getInstance();
130 $permissionManager = $services->getPermissionManager();
131
132 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
133
134 $this->output( "Importing Files\n\n" );
135
136 $dir = $this->getArg( 0 );
137
138 # Check Protection
139 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
140 $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
141 }
142
143 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
144 $this->fatalError( "You must specify a protection option.\n" );
145 }
146
147 # Prepare the list of allowed extensions
148 $extensions = $this->hasOption( 'extensions' )
149 ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
150 : $this->getConfig()->get( MainConfigNames::FileExtensions );
151
152 # Search the path provided for candidates for import
153 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
154 if ( !$files ) {
155 $this->output( "No suitable files could be found for import.\n" );
156 return;
157 }
158
159 # Initialise the user for this operation
160 $user = $this->hasOption( 'user' )
161 ? User::newFromName( $this->getOption( 'user' ) )
162 : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
163 if ( !$user instanceof User ) {
164 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
165 }
166 '@phan-var User $user';
168
169 # Get block check. If a value is given, this specified how often the check is performed
170 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
171
172 $from = $this->getOption( 'from' );
173 $sleep = (int)$this->getOption( 'sleep' );
174 $limit = (int)$this->getOption( 'limit' );
175 $timestamp = $this->getOption( 'timestamp', false );
176
177 # Get the upload comment. Provide a default one in case there's no comment given.
178 $commentFile = $this->getOption( 'comment-file' );
179 if ( $commentFile !== null ) {
180 $comment = file_get_contents( $commentFile );
181 if ( $comment === false || $comment === null ) {
182 $this->fatalError( "failed to read comment file: {$commentFile}\n" );
183 }
184 } else {
185 $comment = $this->getOption( 'comment', 'Importing file' );
186 }
187 $commentExt = $this->getOption( 'comment-ext' );
188 $summary = $this->getOption( 'summary', '' );
189
190 $license = $this->getOption( 'license', '' );
191
192 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
193
196 : [];
197
198 # Batch "upload" operation
199 $lbFactory = $services->getDBLoadBalancerFactory();
200 $restrictionStore = $services->getRestrictionStore();
201 foreach ( $files as $file ) {
202 if ( $sleep && ( $processed > 0 ) ) {
203 sleep( $sleep );
204 }
205
206 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
207
208 # Validate a title
209 $title = Title::makeTitleSafe( NS_FILE, $base );
210 if ( !is_object( $title ) ) {
211 $this->output(
212 "{$base} could not be imported; a valid title cannot be produced\n"
213 );
214 continue;
215 }
216
217 if ( $from ) {
218 if ( $from == $title->getDBkey() ) {
219 $from = null;
220 } else {
221 $ignored++;
222 continue;
223 }
224 }
225
226 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
227 $user->clearInstanceCache( 'name' ); // reload from DB!
228 if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
229 $this->output(
230 "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
231 );
232 $skipped++;
233 continue;
234 }
235 }
236
237 # Check existence
238 $image = $services->getRepoGroup()->getLocalRepo()
239 ->newFile( $title );
240 if ( $image->exists() ) {
241 if ( $this->hasOption( 'overwrite' ) ) {
242 $this->output( "{$base} exists, overwriting..." );
243 $svar = 'overwritten';
244 } else {
245 $this->output( "{$base} exists, skipping\n" );
246 $skipped++;
247 continue;
248 }
249 } else {
250 if ( $this->hasOption( 'skip-dupes' ) ) {
251 $repo = $image->getRepo();
252 # XXX: we end up calculating this again when actually uploading. that sucks.
254
255 $dupes = $repo->findBySha1( $sha1 );
256
257 if ( $dupes ) {
258 $this->output(
259 "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
260 );
261 $skipped++;
262 continue;
263 }
264 }
265
266 $this->output( "Importing {$base}..." );
267 $svar = 'added';
268 }
269
270 if ( $sourceWikiUrl ) {
271 /* find comment text directly from source wiki, through MW's API */
272 $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
273 if ( $real_comment === false ) {
274 $commentText = $comment;
275 } else {
276 $commentText = $real_comment;
277 }
278
279 /* find user directly from source wiki, through MW's API */
280 $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
281 if ( $real_user === false ) {
282 // don't change $wgUser
283 } else {
284 $realUser = User::newFromName( $real_user );
285 if ( $realUser === false ) {
286 # user does not exist in target wiki
287 $this->output(
288 "failed: user '$real_user' does not exist in target wiki."
289 );
290 continue;
291 }
292 StubGlobalUser::setUser( $realUser );
293 $user = $realUser;
294 }
295 } else {
296 # Find comment text
297 $commentText = false;
298
299 if ( $commentExt ) {
300 $f = $this->findAuxFile( $file, $commentExt );
301 if ( !$f ) {
302 $this->output( " No comment file with extension {$commentExt} found "
303 . "for {$file}, using default comment." );
304 } else {
305 $commentText = file_get_contents( $f );
306 if ( !$commentText ) {
307 $this->output(
308 " Failed to load comment file {$f}, using default comment."
309 );
310 }
311 }
312 }
313
314 if ( !$commentText ) {
315 $commentText = $comment;
316 }
317 }
318
319 # Import the file
320 if ( $this->hasOption( 'dry' ) ) {
321 $this->output(
322 " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
323 );
324 } else {
325 $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
326 $props = $mwProps->getPropsFromPath( $file, true );
327 $flags = 0;
328 $publishOptions = [];
329 $handler = MediaHandler::getHandler( $props['mime'] );
330 if ( $handler ) {
331 $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
332 } else {
333 $publishOptions['headers'] = [];
334 }
335 $archive = $image->publish( $file, $flags, $publishOptions );
336 if ( !$archive->isGood() ) {
337 $this->output( "failed. (" .
338 $archive->getMessage( false, false, 'en' )->text() .
339 ")\n" );
340 $failed++;
341 continue;
342 }
343 }
344
345 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
346 if ( !$this->hasOption( 'summary' ) ) {
347 $summary = $commentText;
348 }
349
350 if ( $this->hasOption( 'dry' ) ) {
351 $this->output( "done.\n" );
352 } elseif ( $image->recordUpload3(
353 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
354 $archive->value,
355 $summary,
356 $commentText,
357 $user,
358 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
359 $props,
360 $timestamp,
361 $tags
362 )->isOK() ) {
363 $this->output( "done.\n" );
364
365 $doProtect = false;
366
367 $protectLevel = $this->getOption( 'protect' );
368 $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
369
370 if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
371 $doProtect = true;
372 }
373 if ( $this->hasOption( 'unprotect' ) ) {
374 $protectLevel = '';
375 $doProtect = true;
376 }
377
378 if ( $doProtect ) {
379 # Protect the file
380 $this->output( "\nWaiting for replica DBs...\n" );
381 // Wait for replica DBs.
382 sleep( 2 ); # Why this sleep?
383 $lbFactory->waitForReplication();
384
385 $this->output( "\nSetting image restrictions ..." );
386
387 $cascade = false;
388 $restrictions = [];
389 foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
390 $restrictions[$type] = $protectLevel;
391 }
392
393 $page = $services->getWikiPageFactory()->newFromTitle( $title );
394 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
395 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
396 }
397 } else {
398 $this->output( "failed. (at recordUpload stage)\n" );
399 $svar = 'failed';
400 }
401
402 $$svar++;
403 $processed++;
404
405 if ( $limit && $processed >= $limit ) {
406 break;
407 }
408 }
409
410 # Print out some statistics
411 $this->output( "\n" );
412 foreach (
413 [
414 'Found' => count( $files ),
415 'Limit' => $limit,
416 'Ignored' => $ignored,
417 'Added' => $added,
418 'Skipped' => $skipped,
419 'Overwritten' => $overwritten,
420 'Failed' => $failed,
421 ] as $desc => $number
422 ) {
423 if ( $number > 0 ) {
424 $this->output( "{$desc}: {$number}\n" );
425 }
426 }
427 }
428
437 private function findFiles( $dir, $exts, $recurse = false ) {
438 if ( !is_dir( $dir ) ) {
439 return [];
440 }
441
442 $dhl = opendir( $dir );
443 if ( !$dhl ) {
444 return [];
445 }
446
447 $files = [];
448 while ( ( $file = readdir( $dhl ) ) !== false ) {
449 if ( is_file( $dir . '/' . $file ) ) {
450 $ext = pathinfo( $file, PATHINFO_EXTENSION );
451 if ( in_array( strtolower( $ext ), $exts ) ) {
452 $files[] = $dir . '/' . $file;
453 }
454 } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
455 $files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
456 }
457 }
458
459 return $files;
460 }
461
476 private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
477 if ( strpos( $auxExtension, '.' ) !== 0 ) {
478 $auxExtension = '.' . $auxExtension;
479 }
480
481 $d = dirname( $file );
482 $n = basename( $file );
483
484 while ( $maxStrip >= 0 ) {
485 $f = $d . '/' . $n . $auxExtension;
486
487 if ( file_exists( $f ) ) {
488 return $f;
489 }
490
491 $idx = strrpos( $n, '.' );
492 if ( !$idx ) {
493 break;
494 }
495
496 $n = substr( $n, 0, $idx );
497 $maxStrip -= 1;
498 }
499
500 return false;
501 }
502
512 private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
513 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
514 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
515 $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
516 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
517 return false;
518 }
519
520 return html_entity_decode( $matches[1] );
521 }
522
523 private function getFileUserFromSourceWiki( $wiki_host, $file ) {
524 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
525 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
526 $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
527 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
528 return false;
529 }
530
531 return html_entity_decode( $matches[1] );
532 }
533
534}
535
536$maintClass = ImportImages::class;
537require_once RUN_MAINTENANCE_IF_MAIN;
const NS_FILE
Definition Defines.php:70
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
static getSoftwareTags( $all=false)
Loads defined core tags, checks for invalid types (if not array), and filters for supported and enabl...
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding,...
Definition FSFile.php:225
execute()
Do the actual work.
__construct()
Default constructor.
MimeMagic helper wrapper.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static getHandler( $type)
Get a MediaHandler for a given MIME type from the instance cache.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
static setUser( $user)
Reset the stub global user to a different "real" user object, while ensuring that any method calls on...
internal since 1.36
Definition User.php:70
static newFromName( $name, $validate='valid')
Definition User.php:598
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition User.php:806
const MAINTENANCE_SCRIPT_USER
Username used for various maintenance scripts.
Definition User.php:116
$maintClass
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42
if(!is_readable( $file)) $ext
Definition router.php:48