MediaWiki  master
importImages.php
Go to the documentation of this file.
1 <?php
35 require_once __DIR__ . '/Maintenance.php';
36 
38 
39 class ImportImages extends Maintenance {
40 
41  public function __construct() {
42  parent::__construct();
43 
44  $this->addDescription( 'Imports images and other media files into the wiki' );
45  $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
46 
47  $this->addOption( 'extensions',
48  'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
49  false,
50  true
51  );
52  $this->addOption( 'overwrite',
53  'Overwrite existing images with the same name (default is to skip them)' );
54  $this->addOption( 'limit',
55  'Limit the number of images to process. Ignored or skipped images are not counted',
56  false,
57  true
58  );
59  $this->addOption( 'from',
60  "Ignore all files until the one with the given name. Useful for resuming aborted "
61  . "imports. The name should be the file's canonical database form.",
62  false,
63  true
64  );
65  $this->addOption( 'skip-dupes',
66  'Skip images that were already uploaded under a different name (check SHA1)' );
67  $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
68  $this->addOption( 'sleep',
69  'Sleep between files. Useful mostly for debugging',
70  false,
71  true
72  );
73  $this->addOption( 'user',
74  "Set username of uploader, default 'Maintenance script'",
75  false,
76  true
77  );
78  // This parameter can optionally have an argument. If none specified, getOption()
79  // returns 1 which is precisely what we need.
80  $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
81  $this->addOption( 'comment',
82  "Set file description, default 'Importing file'",
83  false,
84  true
85  );
86  $this->addOption( 'comment-file',
87  'Set description to the content of this file',
88  false,
89  true
90  );
91  $this->addOption( 'comment-ext',
92  'Causes the description for each file to be loaded from a file with the same name, but '
93  . 'the extension provided. If a global description is also given, it is appended.',
94  false,
95  true
96  );
97  $this->addOption( 'summary',
98  'Upload summary, description will be used if not provided',
99  false,
100  true
101  );
102  $this->addOption( 'license',
103  'Use an optional license template',
104  false,
105  true
106  );
107  $this->addOption( 'timestamp',
108  'Override upload time/date, all MediaWiki timestamp formats are accepted',
109  false,
110  true
111  );
112  $this->addOption( 'protect',
113  'Specify the protect value (autoconfirmed,sysop)',
114  false,
115  true
116  );
117  $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
118  $this->addOption( 'source-wiki-url',
119  'If specified, take User and Comment data for each imported file from this URL. '
120  . 'For example, --source-wiki-url="http://en.wikipedia.org/',
121  false,
122  true
123  );
124  $this->addOption( 'dry', "Dry run, don't import anything" );
125  }
126 
127  public function execute() {
129 
130  $services = MediaWikiServices::getInstance();
131  $permissionManager = $services->getPermissionManager();
132 
133  $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
134 
135  $this->output( "Importing Files\n\n" );
136 
137  $dir = $this->getArg( 0 );
138 
139  # Check Protection
140  if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
141  $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
142  }
143 
144  if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
145  $this->fatalError( "You must specify a protection option.\n" );
146  }
147 
148  # Prepare the list of allowed extensions
149  $extensions = $this->hasOption( 'extensions' )
150  ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
152 
153  # Search the path provided for candidates for import
154  $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
155 
156  # Initialise the user for this operation
157  $user = $this->hasOption( 'user' )
158  ? User::newFromName( $this->getOption( 'user' ) )
159  : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
160  if ( !$user instanceof User ) {
161  $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
162  }
163  StubGlobalUser::setUser( $user );
164 
165  # Get block check. If a value is given, this specified how often the check is performed
166  $checkUserBlock = (int)$this->getOption( 'check-userblock' );
167 
168  $from = $this->getOption( 'from' );
169  $sleep = (int)$this->getOption( 'sleep' );
170  $limit = (int)$this->getOption( 'limit' );
171  $timestamp = $this->getOption( 'timestamp', false );
172 
173  # Get the upload comment. Provide a default one in case there's no comment given.
174  $commentFile = $this->getOption( 'comment-file' );
175  if ( $commentFile !== null ) {
176  $comment = file_get_contents( $commentFile );
177  if ( $comment === false || $comment === null ) {
178  $this->fatalError( "failed to read comment file: {$commentFile}\n" );
179  }
180  } else {
181  $comment = $this->getOption( 'comment', 'Importing file' );
182  }
183  $commentExt = $this->getOption( 'comment-ext' );
184  $summary = $this->getOption( 'summary', '' );
185 
186  $license = $this->getOption( 'license', '' );
187 
188  $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
189 
192  : [];
193 
194  # Batch "upload" operation
195  $count = count( $files );
196  if ( $count > 0 ) {
197  $lbFactory = $services->getDBLoadBalancerFactory();
198  foreach ( $files as $file ) {
199  if ( $sleep && ( $processed > 0 ) ) {
200  sleep( $sleep );
201  }
202 
203  $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
204 
205  # Validate a title
207  if ( !is_object( $title ) ) {
208  $this->output(
209  "{$base} could not be imported; a valid title cannot be produced\n"
210  );
211  continue;
212  }
213 
214  if ( $from ) {
215  if ( $from == $title->getDBkey() ) {
216  $from = null;
217  } else {
218  $ignored++;
219  continue;
220  }
221  }
222 
223  if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
224  $user->clearInstanceCache( 'name' ); // reload from DB!
225  if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
226  $this->output(
227  "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
228  );
229  $skipped++;
230  continue;
231  }
232  }
233 
234  # Check existence
235  $image = $services->getRepoGroup()->getLocalRepo()
236  ->newFile( $title );
237  if ( $image->exists() ) {
238  if ( $this->hasOption( 'overwrite' ) ) {
239  $this->output( "{$base} exists, overwriting..." );
240  $svar = 'overwritten';
241  } else {
242  $this->output( "{$base} exists, skipping\n" );
243  $skipped++;
244  continue;
245  }
246  } else {
247  if ( $this->hasOption( 'skip-dupes' ) ) {
248  $repo = $image->getRepo();
249  # XXX: we end up calculating this again when actually uploading. that sucks.
251 
252  $dupes = $repo->findBySha1( $sha1 );
253 
254  if ( $dupes ) {
255  $this->output(
256  "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
257  );
258  $skipped++;
259  continue;
260  }
261  }
262 
263  $this->output( "Importing {$base}..." );
264  $svar = 'added';
265  }
266 
267  if ( $sourceWikiUrl ) {
268  /* find comment text directly from source wiki, through MW's API */
269  $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
270  if ( $real_comment === false ) {
271  $commentText = $comment;
272  } else {
273  $commentText = $real_comment;
274  }
275 
276  /* find user directly from source wiki, through MW's API */
277  $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
278  if ( $real_user === false ) {
279  // don't change $wgUser
280  } else {
281  $realUser = User::newFromName( $real_user );
282  if ( $realUser === false ) {
283  # user does not exist in target wiki
284  $this->output(
285  "failed: user '$real_user' does not exist in target wiki."
286  );
287  continue;
288  }
289  StubGlobalUser::setUser( $realUser );
290  $user = $realUser;
291  }
292  } else {
293  # Find comment text
294  $commentText = false;
295 
296  if ( $commentExt ) {
297  $f = $this->findAuxFile( $file, $commentExt );
298  if ( !$f ) {
299  $this->output( " No comment file with extension {$commentExt} found "
300  . "for {$file}, using default comment." );
301  } else {
302  $commentText = file_get_contents( $f );
303  if ( !$commentText ) {
304  $this->output(
305  " Failed to load comment file {$f}, using default comment."
306  );
307  }
308  }
309  }
310 
311  if ( !$commentText ) {
312  $commentText = $comment;
313  }
314  }
315 
316  # Import the file
317  if ( $this->hasOption( 'dry' ) ) {
318  $this->output(
319  " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
320  );
321  } else {
322  $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
323  $props = $mwProps->getPropsFromPath( $file, true );
324  $flags = 0;
325  $publishOptions = [];
326  $handler = MediaHandler::getHandler( $props['mime'] );
327  if ( $handler ) {
328  $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
329  } else {
330  $publishOptions['headers'] = [];
331  }
332  $archive = $image->publish( $file, $flags, $publishOptions );
333  if ( !$archive->isGood() ) {
334  $this->output( "failed. (" .
335  $archive->getMessage( false, false, 'en' )->text() .
336  ")\n" );
337  $failed++;
338  continue;
339  }
340  }
341 
342  $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
343  if ( !$this->hasOption( 'summary' ) ) {
344  $summary = $commentText;
345  }
346 
347  if ( $this->hasOption( 'dry' ) ) {
348  $this->output( "done.\n" );
349  } elseif ( $image->recordUpload3(
350  $archive->value,
351  $summary,
352  $commentText,
353  $user,
354  $props,
355  $timestamp,
356  $tags
357  )->isOK() ) {
358  $this->output( "done.\n" );
359 
360  $doProtect = false;
361 
362  $protectLevel = $this->getOption( 'protect' );
363 
364  if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
365  $doProtect = true;
366  }
367  if ( $this->hasOption( 'unprotect' ) ) {
368  $protectLevel = '';
369  $doProtect = true;
370  }
371 
372  if ( $doProtect ) {
373  # Protect the file
374  $this->output( "\nWaiting for replica DBs...\n" );
375  // Wait for replica DBs.
376  sleep( 2 ); # Why this sleep?
377  $lbFactory->waitForReplication();
378 
379  $this->output( "\nSetting image restrictions ..." );
380 
381  $cascade = false;
382  $restrictions = [];
383  foreach ( $title->getRestrictionTypes() as $type ) {
384  $restrictions[$type] = $protectLevel;
385  }
386 
387  $page = $services->getWikiPageFactory()->newFromTitle( $title );
388  $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
389  $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
390  }
391  } else {
392  $this->output( "failed. (at recordUpload stage)\n" );
393  $svar = 'failed';
394  }
395 
396  $$svar++;
397  $processed++;
398 
399  if ( $limit && $processed >= $limit ) {
400  break;
401  }
402  }
403 
404  # Print out some statistics
405  $this->output( "\n" );
406  foreach (
407  [
408  'count' => 'Found',
409  'limit' => 'Limit',
410  'ignored' => 'Ignored',
411  'added' => 'Added',
412  'skipped' => 'Skipped',
413  'overwritten' => 'Overwritten',
414  'failed' => 'Failed'
415  ] as $var => $desc
416  ) {
417  if ( $$var > 0 ) {
418  $this->output( "{$desc}: {$$var}\n" );
419  }
420  }
421  } else {
422  $this->output( "No suitable files could be found for import.\n" );
423  }
424  }
425 
434  private function findFiles( $dir, $exts, $recurse = false ) {
435  if ( is_dir( $dir ) ) {
436  $dhl = opendir( $dir );
437  if ( $dhl ) {
438  $files = [];
439  while ( ( $file = readdir( $dhl ) ) !== false ) {
440  if ( is_file( $dir . '/' . $file ) ) {
441  $ext = pathinfo( $file, PATHINFO_EXTENSION );
442  if ( array_search( strtolower( $ext ), $exts ) !== false ) {
443  $files[] = $dir . '/' . $file;
444  }
445  } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
446  $files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
447  }
448  }
449 
450  return $files;
451  } else {
452  return [];
453  }
454  } else {
455  return [];
456  }
457  }
458 
473  private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
474  if ( strpos( $auxExtension, '.' ) !== 0 ) {
475  $auxExtension = '.' . $auxExtension;
476  }
477 
478  $d = dirname( $file );
479  $n = basename( $file );
480 
481  while ( $maxStrip >= 0 ) {
482  $f = $d . '/' . $n . $auxExtension;
483 
484  if ( file_exists( $f ) ) {
485  return $f;
486  }
487 
488  $idx = strrpos( $n, '.' );
489  if ( !$idx ) {
490  break;
491  }
492 
493  $n = substr( $n, 0, $idx );
494  $maxStrip -= 1;
495  }
496 
497  return false;
498  }
499 
509  private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
510  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
511  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
512  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
513  if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
514  return false;
515  }
516 
517  return html_entity_decode( $matches[1] );
518  }
519 
520  private function getFileUserFromSourceWiki( $wiki_host, $file ) {
521  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
522  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
523  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
524  if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
525  return false;
526  }
527 
528  return html_entity_decode( $matches[1] );
529  }
530 
531 }
532 
533 $maintClass = ImportImages::class;
534 require_once RUN_MAINTENANCE_IF_MAIN;
SpecialUpload\getInitialPageText
static getInitialPageText( $comment='', $license='', $copyStatus='', $source='', Config $config=null)
Get the initial image page text based on a comment and optional file status information.
Definition: SpecialUpload.php:645
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:193
Maintenance\fatalError
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Definition: Maintenance.php:489
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:329
$wgFileExtensions
$wgFileExtensions
This is the list of preferred extensions for uploading files.
Definition: DefaultSettings.php:1105
wfBaseName
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
Definition: GlobalFunctions.php:2095
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
User\newFromName
static newFromName( $name, $validate='valid')
Definition: User.php:606
StubGlobalUser\setUser
static setUser( $user)
Reset the stub global user to a different "real" user object, while ensuring that any method calls on...
Definition: StubGlobalUser.php:79
ImportImages\__construct
__construct()
Default constructor.
Definition: importImages.php:41
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:59
$base
$base
Definition: generateLocalAutoload.php:11
ImportImages\getFileUserFromSourceWiki
getFileUserFromSourceWiki( $wiki_host, $file)
Definition: importImages.php:520
User\newSystemUser
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition: User.php:809
ImportImages\findFiles
findFiles( $dir, $exts, $recurse=false)
Search a directory for files with one of a set of extensions.
Definition: importImages.php:434
ImportImages\getFileCommentFromSourceWiki
getFileCommentFromSourceWiki( $wiki_host, $file)
Definition: importImages.php:509
$matches
$matches
Definition: NoLocalSettings.php:24
FSFile\getSha1Base36FromPath
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding,...
Definition: FSFile.php:225
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:249
ChangeTags\getSoftwareTags
static getSoftwareTags( $all=false)
Loads defined core tags, checks for invalid types (if not array), and filters for supported and enabl...
Definition: ChangeTags.php:158
$title
$title
Definition: testCompression.php:38
MWFileProps
MimeMagic helper wrapper.
Definition: MWFileProps.php:28
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:676
ChangeTags\TAG_SERVER_SIDE_UPLOAD
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
Definition: ChangeTags.php:90
ImportImages\findAuxFile
findAuxFile( $file, $auxExtension, $maxStrip=1)
Find an auxilliary file with the given extension, matching the give base file path.
Definition: importImages.php:473
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:286
Maintenance\addArg
addArg( $arg, $description, $required=true)
Add some args that are needed.
Definition: Maintenance.php:300
MediaHandler\getHandler
static getHandler( $type)
Get a MediaHandler for a given MIME type from the instance cache.
Definition: MediaHandler.php:53
$wgRestrictionLevels
$wgRestrictionLevels
Rights which can be required for each protection level (via action=protect)
Definition: DefaultSettings.php:6166
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:435
$ext
if(!is_readable( $file)) $ext
Definition: router.php:48
NS_FILE
const NS_FILE
Definition: Defines.php:70
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular option was set.
Definition: Maintenance.php:271
Maintenance\getArg
getArg( $argId=0, $default=null)
Get an argument.
Definition: Maintenance.php:353
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:68
ImportImages\execute
execute()
Do the actual work.
Definition: importImages.php:127
ImportImages
Definition: importImages.php:39
User\MAINTENANCE_SCRIPT_USER
const MAINTENANCE_SCRIPT_USER
Username used for various maintenance scripts.
Definition: User.php:121
$maintClass
$maintClass
Definition: importImages.php:533
$type
$type
Definition: testCompression.php:52