MediaWiki REL1_37
importImages.php
Go to the documentation of this file.
1<?php
35require_once __DIR__ . '/Maintenance.php';
36
38
40
41 public function __construct() {
42 parent::__construct();
43
44 $this->addDescription( 'Imports images and other media files into the wiki' );
45 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
46
47 $this->addOption( 'extensions',
48 'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
49 false,
50 true
51 );
52 $this->addOption( 'overwrite',
53 'Overwrite existing images with the same name (default is to skip them)' );
54 $this->addOption( 'limit',
55 'Limit the number of images to process. Ignored or skipped images are not counted',
56 false,
57 true
58 );
59 $this->addOption( 'from',
60 "Ignore all files until the one with the given name. Useful for resuming aborted "
61 . "imports. The name should be the file's canonical database form.",
62 false,
63 true
64 );
65 $this->addOption( 'skip-dupes',
66 'Skip images that were already uploaded under a different name (check SHA1)' );
67 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
68 $this->addOption( 'sleep',
69 'Sleep between files. Useful mostly for debugging',
70 false,
71 true
72 );
73 $this->addOption( 'user',
74 "Set username of uploader, default 'Maintenance script'",
75 false,
76 true
77 );
78 // This parameter can optionally have an argument. If none specified, getOption()
79 // returns 1 which is precisely what we need.
80 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
81 $this->addOption( 'comment',
82 "Set file description, default 'Importing file'",
83 false,
84 true
85 );
86 $this->addOption( 'comment-file',
87 'Set description to the content of this file',
88 false,
89 true
90 );
91 $this->addOption( 'comment-ext',
92 'Causes the description for each file to be loaded from a file with the same name, but '
93 . 'the extension provided. If a global description is also given, it is appended.',
94 false,
95 true
96 );
97 $this->addOption( 'summary',
98 'Upload summary, description will be used if not provided',
99 false,
100 true
101 );
102 $this->addOption( 'license',
103 'Use an optional license template',
104 false,
105 true
106 );
107 $this->addOption( 'timestamp',
108 'Override upload time/date, all MediaWiki timestamp formats are accepted',
109 false,
110 true
111 );
112 $this->addOption( 'protect',
113 'Specify the protect value (autoconfirmed,sysop)',
114 false,
115 true
116 );
117 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
118 $this->addOption( 'source-wiki-url',
119 'If specified, take User and Comment data for each imported file from this URL. '
120 . 'For example, --source-wiki-url="http://en.wikipedia.org/',
121 false,
122 true
123 );
124 $this->addOption( 'dry', "Dry run, don't import anything" );
125 }
126
127 public function execute() {
129
130 $services = MediaWikiServices::getInstance();
131 $permissionManager = $services->getPermissionManager();
132
133 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
134
135 $this->output( "Importing Files\n\n" );
136
137 $dir = $this->getArg( 0 );
138
139 # Check Protection
140 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
141 $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
142 }
143
144 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
145 $this->fatalError( "You must specify a protection option.\n" );
146 }
147
148 # Prepare the list of allowed extensions
149 $extensions = $this->hasOption( 'extensions' )
150 ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
152
153 # Search the path provided for candidates for import
154 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
155
156 # Initialise the user for this operation
157 $user = $this->hasOption( 'user' )
158 ? User::newFromName( $this->getOption( 'user' ) )
159 : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
160 if ( !$user instanceof User ) {
161 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
162 }
164
165 # Get block check. If a value is given, this specified how often the check is performed
166 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
167
168 $from = $this->getOption( 'from' );
169 $sleep = (int)$this->getOption( 'sleep' );
170 $limit = (int)$this->getOption( 'limit' );
171 $timestamp = $this->getOption( 'timestamp', false );
172
173 # Get the upload comment. Provide a default one in case there's no comment given.
174 $commentFile = $this->getOption( 'comment-file' );
175 if ( $commentFile !== null ) {
176 $comment = file_get_contents( $commentFile );
177 if ( $comment === false || $comment === null ) {
178 $this->fatalError( "failed to read comment file: {$commentFile}\n" );
179 }
180 } else {
181 $comment = $this->getOption( 'comment', 'Importing file' );
182 }
183 $commentExt = $this->getOption( 'comment-ext' );
184 $summary = $this->getOption( 'summary', '' );
185
186 $license = $this->getOption( 'license', '' );
187
188 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
189
192 : [];
193
194 # Batch "upload" operation
195 $count = count( $files );
196 if ( $count > 0 ) {
197 $lbFactory = $services->getDBLoadBalancerFactory();
198 foreach ( $files as $file ) {
199 if ( $sleep && ( $processed > 0 ) ) {
200 sleep( $sleep );
201 }
202
203 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
204
205 # Validate a title
206 $title = Title::makeTitleSafe( NS_FILE, $base );
207 if ( !is_object( $title ) ) {
208 $this->output(
209 "{$base} could not be imported; a valid title cannot be produced\n"
210 );
211 continue;
212 }
213
214 if ( $from ) {
215 if ( $from == $title->getDBkey() ) {
216 $from = null;
217 } else {
218 $ignored++;
219 continue;
220 }
221 }
222
223 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
224 $user->clearInstanceCache( 'name' ); // reload from DB!
225 if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
226 $this->output(
227 "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
228 );
229 $skipped++;
230 continue;
231 }
232 }
233
234 # Check existence
235 $image = $services->getRepoGroup()->getLocalRepo()
236 ->newFile( $title );
237 if ( $image->exists() ) {
238 if ( $this->hasOption( 'overwrite' ) ) {
239 $this->output( "{$base} exists, overwriting..." );
240 $svar = 'overwritten';
241 } else {
242 $this->output( "{$base} exists, skipping\n" );
243 $skipped++;
244 continue;
245 }
246 } else {
247 if ( $this->hasOption( 'skip-dupes' ) ) {
248 $repo = $image->getRepo();
249 # XXX: we end up calculating this again when actually uploading. that sucks.
251
252 $dupes = $repo->findBySha1( $sha1 );
253
254 if ( $dupes ) {
255 $this->output(
256 "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
257 );
258 $skipped++;
259 continue;
260 }
261 }
262
263 $this->output( "Importing {$base}..." );
264 $svar = 'added';
265 }
266
267 if ( $sourceWikiUrl ) {
268 /* find comment text directly from source wiki, through MW's API */
269 $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
270 if ( $real_comment === false ) {
271 $commentText = $comment;
272 } else {
273 $commentText = $real_comment;
274 }
275
276 /* find user directly from source wiki, through MW's API */
277 $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
278 if ( $real_user === false ) {
279 // don't change $wgUser
280 } else {
281 $realUser = User::newFromName( $real_user );
282 if ( $realUser === false ) {
283 # user does not exist in target wiki
284 $this->output(
285 "failed: user '$real_user' does not exist in target wiki."
286 );
287 continue;
288 }
289 StubGlobalUser::setUser( $realUser );
290 $user = $realUser;
291 }
292 } else {
293 # Find comment text
294 $commentText = false;
295
296 if ( $commentExt ) {
297 $f = $this->findAuxFile( $file, $commentExt );
298 if ( !$f ) {
299 $this->output( " No comment file with extension {$commentExt} found "
300 . "for {$file}, using default comment." );
301 } else {
302 $commentText = file_get_contents( $f );
303 if ( !$commentText ) {
304 $this->output(
305 " Failed to load comment file {$f}, using default comment."
306 );
307 }
308 }
309 }
310
311 if ( !$commentText ) {
312 $commentText = $comment;
313 }
314 }
315
316 # Import the file
317 if ( $this->hasOption( 'dry' ) ) {
318 $this->output(
319 " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
320 );
321 } else {
322 $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
323 $props = $mwProps->getPropsFromPath( $file, true );
324 $flags = 0;
325 $publishOptions = [];
326 $handler = MediaHandler::getHandler( $props['mime'] );
327 if ( $handler ) {
328 $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
329 } else {
330 $publishOptions['headers'] = [];
331 }
332 $archive = $image->publish( $file, $flags, $publishOptions );
333 if ( !$archive->isGood() ) {
334 $this->output( "failed. (" .
335 $archive->getMessage( false, false, 'en' )->text() .
336 ")\n" );
337 $failed++;
338 continue;
339 }
340 }
341
342 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
343 if ( !$this->hasOption( 'summary' ) ) {
344 $summary = $commentText;
345 }
346
347 if ( $this->hasOption( 'dry' ) ) {
348 $this->output( "done.\n" );
349 } elseif ( $image->recordUpload3(
350 $archive->value,
351 $summary,
352 $commentText,
353 $user,
354 $props,
355 $timestamp,
356 $tags
357 )->isOK() ) {
358 $this->output( "done.\n" );
359
360 $doProtect = false;
361
362 $protectLevel = $this->getOption( 'protect' );
363
364 if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
365 $doProtect = true;
366 }
367 if ( $this->hasOption( 'unprotect' ) ) {
368 $protectLevel = '';
369 $doProtect = true;
370 }
371
372 if ( $doProtect ) {
373 # Protect the file
374 $this->output( "\nWaiting for replica DBs...\n" );
375 // Wait for replica DBs.
376 sleep( 2 ); # Why this sleep?
377 $lbFactory->waitForReplication();
378
379 $this->output( "\nSetting image restrictions ..." );
380
381 $cascade = false;
382 $restrictions = [];
383 foreach ( $title->getRestrictionTypes() as $type ) {
384 $restrictions[$type] = $protectLevel;
385 }
386
387 $page = $services->getWikiPageFactory()->newFromTitle( $title );
388 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
389 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
390 }
391 } else {
392 $this->output( "failed. (at recordUpload stage)\n" );
393 $svar = 'failed';
394 }
395
396 $$svar++;
397 $processed++;
398
399 if ( $limit && $processed >= $limit ) {
400 break;
401 }
402 }
403
404 # Print out some statistics
405 $this->output( "\n" );
406 foreach (
407 [
408 'count' => 'Found',
409 'limit' => 'Limit',
410 'ignored' => 'Ignored',
411 'added' => 'Added',
412 'skipped' => 'Skipped',
413 'overwritten' => 'Overwritten',
414 'failed' => 'Failed'
415 ] as $var => $desc
416 ) {
417 if ( $$var > 0 ) {
418 $this->output( "{$desc}: {$$var}\n" );
419 }
420 }
421 } else {
422 $this->output( "No suitable files could be found for import.\n" );
423 }
424 }
425
434 private function findFiles( $dir, $exts, $recurse = false ) {
435 if ( is_dir( $dir ) ) {
436 $dhl = opendir( $dir );
437 if ( $dhl ) {
438 $files = [];
439 while ( ( $file = readdir( $dhl ) ) !== false ) {
440 if ( is_file( $dir . '/' . $file ) ) {
441 $ext = pathinfo( $file, PATHINFO_EXTENSION );
442 if ( array_search( strtolower( $ext ), $exts ) !== false ) {
443 $files[] = $dir . '/' . $file;
444 }
445 } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
446 $files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
447 }
448 }
449
450 return $files;
451 } else {
452 return [];
453 }
454 } else {
455 return [];
456 }
457 }
458
473 private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
474 if ( strpos( $auxExtension, '.' ) !== 0 ) {
475 $auxExtension = '.' . $auxExtension;
476 }
477
478 $d = dirname( $file );
479 $n = basename( $file );
480
481 while ( $maxStrip >= 0 ) {
482 $f = $d . '/' . $n . $auxExtension;
483
484 if ( file_exists( $f ) ) {
485 return $f;
486 }
487
488 $idx = strrpos( $n, '.' );
489 if ( !$idx ) {
490 break;
491 }
492
493 $n = substr( $n, 0, $idx );
494 $maxStrip -= 1;
495 }
496
497 return false;
498 }
499
509 private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
510 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
511 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
512 $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
513 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
514 return false;
515 }
516
517 return html_entity_decode( $matches[1] );
518 }
519
520 private function getFileUserFromSourceWiki( $wiki_host, $file ) {
521 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
522 . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
523 $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
524 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
525 return false;
526 }
527
528 return html_entity_decode( $matches[1] );
529 }
530
531}
532
533$maintClass = ImportImages::class;
534require_once RUN_MAINTENANCE_IF_MAIN;
$wgRestrictionLevels
Rights which can be required for each protection level (via action=protect)
$wgFileExtensions
This is the list of preferred extensions for uploading files.
const NS_FILE
Definition Defines.php:70
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
static getSoftwareTags( $all=false)
Loads defined core tags, checks for invalid types (if not array), and filters for supported and enabl...
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding,...
Definition FSFile.php:225
execute()
Do the actual work.
getFileCommentFromSourceWiki( $wiki_host, $file)
findFiles( $dir, $exts, $recurse=false)
Search a directory for files with one of a set of extensions.
__construct()
Default constructor.
findAuxFile( $file, $auxExtension, $maxStrip=1)
Find an auxilliary file with the given extension, matching the give base file path.
getFileUserFromSourceWiki( $wiki_host, $file)
MimeMagic helper wrapper.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static getHandler( $type)
Get a MediaHandler for a given MIME type from the instance cache.
MediaWikiServices is the service locator for the application scope of MediaWiki.
static setUser( $user)
Reset the stub global user to a different "real" user object, while ensuring that any method calls on...
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:69
static newFromName( $name, $validate='valid')
Definition User.php:607
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition User.php:810
const MAINTENANCE_SCRIPT_USER
Username used for various maintenance scripts.
Definition User.php:122
$maintClass
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42
if(!is_readable( $file)) $ext
Definition router.php:48