MediaWiki  master
importImages.php
Go to the documentation of this file.
1 <?php
35 require_once __DIR__ . '/Maintenance.php';
36 
41 
42 class ImportImages extends Maintenance {
43 
44  public function __construct() {
45  parent::__construct();
46 
47  $this->addDescription( 'Imports images and other media files into the wiki' );
48  $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
49 
50  $this->addOption( 'extensions',
51  'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
52  false,
53  true
54  );
55  $this->addOption( 'overwrite',
56  'Overwrite existing images with the same name (default is to skip them)' );
57  $this->addOption( 'limit',
58  'Limit the number of images to process. Ignored or skipped images are not counted',
59  false,
60  true
61  );
62  $this->addOption( 'from',
63  "Ignore all files until the one with the given name. Useful for resuming aborted "
64  . "imports. The name should be the file's canonical database form.",
65  false,
66  true
67  );
68  $this->addOption( 'skip-dupes',
69  'Skip images that were already uploaded under a different name (check SHA1)' );
70  $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
71  $this->addOption( 'sleep',
72  'Sleep between files. Useful mostly for debugging',
73  false,
74  true
75  );
76  $this->addOption( 'user',
77  "Set username of uploader, default 'Maintenance script'",
78  false,
79  true
80  );
81  // This parameter can optionally have an argument. If none specified, getOption()
82  // returns 1 which is precisely what we need.
83  $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
84  $this->addOption( 'comment',
85  "Set file description, default 'Importing file'",
86  false,
87  true
88  );
89  $this->addOption( 'comment-file',
90  'Set description to the content of this file',
91  false,
92  true
93  );
94  $this->addOption( 'comment-ext',
95  'Causes the description for each file to be loaded from a file with the same name, but '
96  . 'the extension provided. If a global description is also given, it is appended.',
97  false,
98  true
99  );
100  $this->addOption( 'summary',
101  'Upload summary, description will be used if not provided',
102  false,
103  true
104  );
105  $this->addOption( 'license',
106  'Use an optional license template',
107  false,
108  true
109  );
110  $this->addOption( 'timestamp',
111  'Override upload time/date, all MediaWiki timestamp formats are accepted',
112  false,
113  true
114  );
115  $this->addOption( 'protect',
116  'Specify the protect value (autoconfirmed,sysop)',
117  false,
118  true
119  );
120  $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
121  $this->addOption( 'source-wiki-url',
122  'If specified, take User and Comment data for each imported file from this URL. '
123  . 'For example, --source-wiki-url="https://en.wikipedia.org/w/',
124  false,
125  true
126  );
127  $this->addOption( 'dry', "Dry run, don't import anything" );
128  }
129 
130  public function execute() {
131  $services = MediaWikiServices::getInstance();
132  $permissionManager = $services->getPermissionManager();
133 
134  $found = 0;
135  $processed = 0;
136  $statistics = [
137  'ignored' => 0,
138  'added' => 0,
139  'skipped' => 0,
140  'overwritten' => 0,
141  'failed' => 0,
142  ];
143 
144  $this->output( "Importing Files\n\n" );
145 
146  $dir = $this->getArg( 0 );
147 
148  # Check Protection
149  if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
150  $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
151  }
152 
153  if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
154  $this->fatalError( "You must specify a protection option.\n" );
155  }
156 
157  # Prepare the list of allowed extensions
158  $extensions = $this->hasOption( 'extensions' )
159  ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
160  : $this->getConfig()->get( MainConfigNames::FileExtensions );
161 
162  # Search the path provided for candidates for import
163  $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
164  if ( !$files->valid() ) {
165  $this->output( "No suitable files could be found for import.\n" );
166  return;
167  }
168 
169  # Initialise the user for this operation
170  $user = $this->hasOption( 'user' )
171  ? User::newFromName( $this->getOption( 'user' ) )
172  : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
173  if ( !$user instanceof User ) {
174  $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
175  }
176  '@phan-var User $user';
177  StubGlobalUser::setUser( $user );
178 
179  # Get block check. If a value is given, this specified how often the check is performed
180  $checkUserBlock = (int)$this->getOption( 'check-userblock' );
181 
182  $from = $this->getOption( 'from' );
183  $sleep = (int)$this->getOption( 'sleep' );
184  $limit = (int)$this->getOption( 'limit' );
185  $timestamp = $this->getOption( 'timestamp', false );
186 
187  # Get the upload comment. Provide a default one in case there's no comment given.
188  $commentFile = $this->getOption( 'comment-file' );
189  if ( $commentFile !== null ) {
190  $comment = file_get_contents( $commentFile );
191  if ( $comment === false || $comment === null ) {
192  $this->fatalError( "failed to read comment file: {$commentFile}\n" );
193  }
194  } else {
195  $comment = $this->getOption( 'comment', 'Importing file' );
196  }
197  $commentExt = $this->getOption( 'comment-ext' );
198  $summary = $this->getOption( 'summary', '' );
199  $license = $this->getOption( 'license', '' );
200  $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
201 
204  : [];
205 
206  # Batch "upload" operation
207  $restrictionStore = $services->getRestrictionStore();
208  foreach ( $files as $file ) {
209  $found++;
210  if ( $sleep && ( $processed > 0 ) ) {
211  sleep( $sleep );
212  }
213 
214  $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
215 
216  # Validate a title
217  $title = Title::makeTitleSafe( NS_FILE, $base );
218  if ( !$title ) {
219  $this->output(
220  "{$base} could not be imported; a valid title cannot be produced\n"
221  );
222  continue;
223  }
224 
225  if ( $from ) {
226  if ( $from !== $title->getDBkey() ) {
227  $statistics['ignored']++;
228  continue;
229  }
230  // Found the requested file, continue from here
231  $from = null;
232  }
233 
234  if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
235  $user->clearInstanceCache( 'name' ); // reload from DB!
236  if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
237  $this->output(
238  "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
239  );
240  $statistics['skipped']++;
241  continue;
242  }
243  }
244 
245  # Check existence
246  $image = $services->getRepoGroup()->getLocalRepo()
247  ->newFile( $title );
248  if ( $image->exists() ) {
249  if ( $this->hasOption( 'overwrite' ) ) {
250  $this->output( "{$base} exists, overwriting..." );
251  $svar = 'overwritten';
252  } else {
253  $this->output( "{$base} exists, skipping\n" );
254  $statistics['skipped']++;
255  continue;
256  }
257  } else {
258  if ( $this->hasOption( 'skip-dupes' ) ) {
259  $repo = $image->getRepo();
260  # XXX: we end up calculating this again when actually uploading. that sucks.
262  $dupes = $repo->findBySha1( $sha1 );
263  if ( $dupes ) {
264  $this->output(
265  "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
266  );
267  $statistics['skipped']++;
268  continue;
269  }
270  }
271 
272  $this->output( "Importing {$base}..." );
273  $svar = 'added';
274  }
275 
276  if ( $sourceWikiUrl ) {
277  /* find comment text directly from source wiki, through MW's API */
278  $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
279  $commentText = $real_comment !== false ? $real_comment : $comment;
280 
281  /* find user directly from source wiki, through MW's API */
282  $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
283  if ( $real_user !== false ) {
284  $realUser = User::newFromName( $real_user );
285  if ( $realUser === false ) {
286  # user does not exist in target wiki
287  $this->output(
288  "failed: user '$real_user' does not exist in target wiki."
289  );
290  continue;
291  }
292  StubGlobalUser::setUser( $realUser );
293  $user = $realUser;
294  }
295  } else {
296  # Find comment text
297  $commentText = false;
298 
299  if ( $commentExt ) {
300  $f = $this->findAuxFile( $file, $commentExt );
301  if ( !$f ) {
302  $this->output( " No comment file with extension {$commentExt} found "
303  . "for {$file}, using default comment." );
304  } else {
305  $commentText = file_get_contents( $f );
306  if ( !$commentText ) {
307  $this->output(
308  " Failed to load comment file {$f}, using default comment."
309  );
310  }
311  }
312  }
313 
314  if ( !$commentText ) {
315  $commentText = $comment;
316  }
317  }
318 
319  # Import the file
320  if ( $this->hasOption( 'dry' ) ) {
321  $this->output(
322  " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
323  );
324  } else {
325  $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
326  $props = $mwProps->getPropsFromPath( $file, true );
327  $flags = 0;
328  $publishOptions = [];
329  $handler = MediaHandler::getHandler( $props['mime'] );
330  if ( $handler ) {
331  $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
332  } else {
333  $publishOptions['headers'] = [];
334  }
335  $archive = $image->publish( $file, $flags, $publishOptions );
336  if ( !$archive->isGood() ) {
337  $this->output( "failed. (" .
338  $archive->getMessage( false, false, 'en' )->text() .
339  ")\n" );
340  $statistics['failed']++;
341  continue;
342  }
343  }
344 
345  $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
346  if ( !$this->hasOption( 'summary' ) ) {
347  $summary = $commentText;
348  }
349 
350  if ( $this->hasOption( 'dry' ) ) {
351  $this->output( "done.\n" );
352  } elseif ( $image->recordUpload3(
353  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
354  $archive->value,
355  $summary,
356  $commentText,
357  $user,
358  // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
359  $props,
360  $timestamp,
361  $tags
362  )->isOK() ) {
363  $this->output( "done.\n" );
364 
365  $doProtect = false;
366 
367  $protectLevel = $this->getOption( 'protect' );
368  $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
369 
370  if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
371  $doProtect = true;
372  }
373  if ( $this->hasOption( 'unprotect' ) ) {
374  $protectLevel = '';
375  $doProtect = true;
376  }
377 
378  if ( $doProtect ) {
379  # Protect the file
380  $this->output( "\nWaiting for replica DBs...\n" );
381  // Wait for replica DBs.
382  sleep( 2 ); # Why this sleep?
383  $this->waitForReplication();
384 
385  $this->output( "\nSetting image restrictions ..." );
386 
387  $cascade = false;
388  $restrictions = [];
389  foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
390  $restrictions[$type] = $protectLevel;
391  }
392 
393  $page = $services->getWikiPageFactory()->newFromTitle( $title );
394  $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
395  $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
396  }
397  } else {
398  $this->output( "failed. (at recordUpload stage)\n" );
399  $svar = 'failed';
400  }
401 
402  $statistics[$svar]++;
403  $processed++;
404 
405  if ( $limit && $processed >= $limit ) {
406  break;
407  }
408  }
409 
410  # Print out some statistics
411  $this->output( "\n" );
412  foreach ( array_merge(
413  [
414  'Found' => $found,
415  'Limit' => $limit,
416  ],
417  $statistics
418  ) as $desc => $number ) {
419  if ( $number > 0 ) {
420  $this->output( ucfirst( $desc ) . ": $number\n" );
421  }
422  }
423  }
424 
433  private function findFiles( $dir, $exts, $recurse = false ) {
434  $dhl = is_dir( $dir ) ? opendir( $dir ) : false;
435  if ( !$dhl ) {
436  return;
437  }
438 
439  while ( ( $file = readdir( $dhl ) ) !== false ) {
440  if ( is_file( $dir . '/' . $file ) ) {
441  $ext = pathinfo( $file, PATHINFO_EXTENSION );
442  if ( in_array( strtolower( $ext ), $exts ) ) {
443  yield $dir . '/' . $file;
444  }
445  } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
446  yield from $this->findFiles( $dir . '/' . $file, $exts, true );
447  }
448  }
449  }
450 
465  private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
466  if ( !str_starts_with( $auxExtension, '.' ) ) {
467  $auxExtension = '.' . $auxExtension;
468  }
469 
470  $d = dirname( $file );
471  $n = basename( $file );
472 
473  while ( $maxStrip >= 0 ) {
474  $f = $d . '/' . $n . $auxExtension;
475 
476  if ( file_exists( $f ) ) {
477  return $f;
478  }
479 
480  $idx = strrpos( $n, '.' );
481  if ( !$idx ) {
482  break;
483  }
484 
485  $n = substr( $n, 0, $idx );
486  $maxStrip -= 1;
487  }
488 
489  return false;
490  }
491 
501  private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
502  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
503  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
504  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
505  if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
506  return false;
507  }
508 
509  return html_entity_decode( $matches[1] );
510  }
511 
512  private function getFileUserFromSourceWiki( $wiki_host, $file ) {
513  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
514  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
515  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
516  if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
517  return false;
518  }
519 
520  return html_entity_decode( $matches[1] );
521  }
522 
523 }
524 
525 $maintClass = ImportImages::class;
526 require_once RUN_MAINTENANCE_IF_MAIN;
const NS_FILE
Definition: Defines.php:70
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
$matches
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
Definition: ChangeTags.php:94
static getSoftwareTags( $all=false)
Loads defined core tags, checks for invalid types (if not array), and filters for supported and enabl...
Definition: ChangeTags.php:145
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding,...
Definition: FSFile.php:225
execute()
Do the actual work.
__construct()
Default constructor.
MimeMagic helper wrapper.
Definition: MWFileProps.php:28
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static getHandler( $type)
Get a MediaHandler for a given MIME type from the instance cache.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Stub object for the global user ($wgUser) that makes it possible to change the relevant underlying ob...
Represents a title within MediaWiki.
Definition: Title.php:82
static getInitialPageText( $comment='', $license='', $copyStatus='', $source='', Config $config=null)
Get the initial image page text based on a comment and optional file status information.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:71
static newFromName( $name, $validate='valid')
Definition: User.php:592
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition: User.php:793
const MAINTENANCE_SCRIPT_USER
Username used for various maintenance scripts.
Definition: User.php:117
$maintClass
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
if(!is_readable( $file)) $ext
Definition: router.php:48