MediaWiki  master
importImages.php
Go to the documentation of this file.
1 <?php
35 require_once __DIR__ . '/Maintenance.php';
36 
39 
40 class ImportImages extends Maintenance {
41 
42  public function __construct() {
43  parent::__construct();
44 
45  $this->addDescription( 'Imports images and other media files into the wiki' );
46  $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
47 
48  $this->addOption( 'extensions',
49  'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
50  false,
51  true
52  );
53  $this->addOption( 'overwrite',
54  'Overwrite existing images with the same name (default is to skip them)' );
55  $this->addOption( 'limit',
56  'Limit the number of images to process. Ignored or skipped images are not counted',
57  false,
58  true
59  );
60  $this->addOption( 'from',
61  "Ignore all files until the one with the given name. Useful for resuming aborted "
62  . "imports. The name should be the file's canonical database form.",
63  false,
64  true
65  );
66  $this->addOption( 'skip-dupes',
67  'Skip images that were already uploaded under a different name (check SHA1)' );
68  $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
69  $this->addOption( 'sleep',
70  'Sleep between files. Useful mostly for debugging',
71  false,
72  true
73  );
74  $this->addOption( 'user',
75  "Set username of uploader, default 'Maintenance script'",
76  false,
77  true
78  );
79  // This parameter can optionally have an argument. If none specified, getOption()
80  // returns 1 which is precisely what we need.
81  $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
82  $this->addOption( 'comment',
83  "Set file description, default 'Importing file'",
84  false,
85  true
86  );
87  $this->addOption( 'comment-file',
88  'Set description to the content of this file',
89  false,
90  true
91  );
92  $this->addOption( 'comment-ext',
93  'Causes the description for each file to be loaded from a file with the same name, but '
94  . 'the extension provided. If a global description is also given, it is appended.',
95  false,
96  true
97  );
98  $this->addOption( 'summary',
99  'Upload summary, description will be used if not provided',
100  false,
101  true
102  );
103  $this->addOption( 'license',
104  'Use an optional license template',
105  false,
106  true
107  );
108  $this->addOption( 'timestamp',
109  'Override upload time/date, all MediaWiki timestamp formats are accepted',
110  false,
111  true
112  );
113  $this->addOption( 'protect',
114  'Specify the protect value (autoconfirmed,sysop)',
115  false,
116  true
117  );
118  $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
119  $this->addOption( 'source-wiki-url',
120  'If specified, take User and Comment data for each imported file from this URL. '
121  . 'For example, --source-wiki-url="http://en.wikipedia.org/',
122  false,
123  true
124  );
125  $this->addOption( 'dry', "Dry run, don't import anything" );
126  }
127 
128  public function execute() {
129  $services = MediaWikiServices::getInstance();
130  $permissionManager = $services->getPermissionManager();
131 
132  $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
133 
134  $this->output( "Importing Files\n\n" );
135 
136  $dir = $this->getArg( 0 );
137 
138  # Check Protection
139  if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
140  $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
141  }
142 
143  if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
144  $this->fatalError( "You must specify a protection option.\n" );
145  }
146 
147  # Prepare the list of allowed extensions
148  $extensions = $this->hasOption( 'extensions' )
149  ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
150  : $this->getConfig()->get( MainConfigNames::FileExtensions );
151 
152  # Search the path provided for candidates for import
153  $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
154  if ( !$files ) {
155  $this->output( "No suitable files could be found for import.\n" );
156  return;
157  }
158 
159  # Initialise the user for this operation
160  $user = $this->hasOption( 'user' )
161  ? User::newFromName( $this->getOption( 'user' ) )
162  : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
163  if ( !$user instanceof User ) {
164  $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
165  }
166  '@phan-var User $user';
167  StubGlobalUser::setUser( $user );
168 
169  # Get block check. If a value is given, this specified how often the check is performed
170  $checkUserBlock = (int)$this->getOption( 'check-userblock' );
171 
172  $from = $this->getOption( 'from' );
173  $sleep = (int)$this->getOption( 'sleep' );
174  $limit = (int)$this->getOption( 'limit' );
175  $timestamp = $this->getOption( 'timestamp', false );
176 
177  # Get the upload comment. Provide a default one in case there's no comment given.
178  $commentFile = $this->getOption( 'comment-file' );
179  if ( $commentFile !== null ) {
180  $comment = file_get_contents( $commentFile );
181  if ( $comment === false || $comment === null ) {
182  $this->fatalError( "failed to read comment file: {$commentFile}\n" );
183  }
184  } else {
185  $comment = $this->getOption( 'comment', 'Importing file' );
186  }
187  $commentExt = $this->getOption( 'comment-ext' );
188  $summary = $this->getOption( 'summary', '' );
189 
190  $license = $this->getOption( 'license', '' );
191 
192  $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
193 
196  : [];
197 
198  # Batch "upload" operation
199  $lbFactory = $services->getDBLoadBalancerFactory();
200  $restrictionStore = $services->getRestrictionStore();
201  foreach ( $files as $file ) {
202  if ( $sleep && ( $processed > 0 ) ) {
203  sleep( $sleep );
204  }
205 
206  $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
207 
208  # Validate a title
210  if ( !is_object( $title ) ) {
211  $this->output(
212  "{$base} could not be imported; a valid title cannot be produced\n"
213  );
214  continue;
215  }
216 
217  if ( $from ) {
218  if ( $from == $title->getDBkey() ) {
219  $from = null;
220  } else {
221  $ignored++;
222  continue;
223  }
224  }
225 
226  if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
227  $user->clearInstanceCache( 'name' ); // reload from DB!
228  if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
229  $this->output(
230  "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
231  );
232  $skipped++;
233  continue;
234  }
235  }
236 
237  # Check existence
238  $image = $services->getRepoGroup()->getLocalRepo()
239  ->newFile( $title );
240  if ( $image->exists() ) {
241  if ( $this->hasOption( 'overwrite' ) ) {
242  $this->output( "{$base} exists, overwriting..." );
243  $svar = 'overwritten';
244  } else {
245  $this->output( "{$base} exists, skipping\n" );
246  $skipped++;
247  continue;
248  }
249  } else {
250  if ( $this->hasOption( 'skip-dupes' ) ) {
251  $repo = $image->getRepo();
252  # XXX: we end up calculating this again when actually uploading. that sucks.
254 
255  $dupes = $repo->findBySha1( $sha1 );
256 
257  if ( $dupes ) {
258  $this->output(
259  "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
260  );
261  $skipped++;
262  continue;
263  }
264  }
265 
266  $this->output( "Importing {$base}..." );
267  $svar = 'added';
268  }
269 
270  if ( $sourceWikiUrl ) {
271  /* find comment text directly from source wiki, through MW's API */
272  $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
273  if ( $real_comment === false ) {
274  $commentText = $comment;
275  } else {
276  $commentText = $real_comment;
277  }
278 
279  /* find user directly from source wiki, through MW's API */
280  $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
281  if ( $real_user === false ) {
282  // don't change $wgUser
283  } else {
284  $realUser = User::newFromName( $real_user );
285  if ( $realUser === false ) {
286  # user does not exist in target wiki
287  $this->output(
288  "failed: user '$real_user' does not exist in target wiki."
289  );
290  continue;
291  }
292  StubGlobalUser::setUser( $realUser );
293  $user = $realUser;
294  }
295  } else {
296  # Find comment text
297  $commentText = false;
298 
299  if ( $commentExt ) {
300  $f = $this->findAuxFile( $file, $commentExt );
301  if ( !$f ) {
302  $this->output( " No comment file with extension {$commentExt} found "
303  . "for {$file}, using default comment." );
304  } else {
305  $commentText = file_get_contents( $f );
306  if ( !$commentText ) {
307  $this->output(
308  " Failed to load comment file {$f}, using default comment."
309  );
310  }
311  }
312  }
313 
314  if ( !$commentText ) {
315  $commentText = $comment;
316  }
317  }
318 
319  # Import the file
320  if ( $this->hasOption( 'dry' ) ) {
321  $this->output(
322  " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
323  );
324  } else {
325  $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
326  $props = $mwProps->getPropsFromPath( $file, true );
327  $flags = 0;
328  $publishOptions = [];
329  $handler = MediaHandler::getHandler( $props['mime'] );
330  if ( $handler ) {
331  $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
332  } else {
333  $publishOptions['headers'] = [];
334  }
335  $archive = $image->publish( $file, $flags, $publishOptions );
336  if ( !$archive->isGood() ) {
337  $this->output( "failed. (" .
338  $archive->getMessage( false, false, 'en' )->text() .
339  ")\n" );
340  $failed++;
341  continue;
342  }
343  }
344 
345  $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
346  if ( !$this->hasOption( 'summary' ) ) {
347  $summary = $commentText;
348  }
349 
350  if ( $this->hasOption( 'dry' ) ) {
351  $this->output( "done.\n" );
352  } elseif ( $image->recordUpload3(
353  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
354  $archive->value,
355  $summary,
356  $commentText,
357  $user,
358  // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
359  $props,
360  $timestamp,
361  $tags
362  )->isOK() ) {
363  $this->output( "done.\n" );
364 
365  $doProtect = false;
366 
367  $protectLevel = $this->getOption( 'protect' );
368  $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
369 
370  if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
371  $doProtect = true;
372  }
373  if ( $this->hasOption( 'unprotect' ) ) {
374  $protectLevel = '';
375  $doProtect = true;
376  }
377 
378  if ( $doProtect ) {
379  # Protect the file
380  $this->output( "\nWaiting for replica DBs...\n" );
381  // Wait for replica DBs.
382  sleep( 2 ); # Why this sleep?
383  $lbFactory->waitForReplication();
384 
385  $this->output( "\nSetting image restrictions ..." );
386 
387  $cascade = false;
388  $restrictions = [];
389  foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
390  $restrictions[$type] = $protectLevel;
391  }
392 
393  $page = $services->getWikiPageFactory()->newFromTitle( $title );
394  $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
395  $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
396  }
397  } else {
398  $this->output( "failed. (at recordUpload stage)\n" );
399  $svar = 'failed';
400  }
401 
402  $$svar++;
403  $processed++;
404 
405  if ( $limit && $processed >= $limit ) {
406  break;
407  }
408  }
409 
410  # Print out some statistics
411  $this->output( "\n" );
412  foreach (
413  [
414  'Found' => count( $files ),
415  'Limit' => $limit,
416  'Ignored' => $ignored,
417  'Added' => $added,
418  'Skipped' => $skipped,
419  'Overwritten' => $overwritten,
420  'Failed' => $failed,
421  ] as $desc => $number
422  ) {
423  if ( $number > 0 ) {
424  $this->output( "{$desc}: {$number}\n" );
425  }
426  }
427  }
428 
437  private function findFiles( $dir, $exts, $recurse = false ) {
438  if ( !is_dir( $dir ) ) {
439  return [];
440  }
441 
442  $dhl = opendir( $dir );
443  if ( !$dhl ) {
444  return [];
445  }
446 
447  $files = [];
448  while ( ( $file = readdir( $dhl ) ) !== false ) {
449  if ( is_file( $dir . '/' . $file ) ) {
450  $ext = pathinfo( $file, PATHINFO_EXTENSION );
451  if ( in_array( strtolower( $ext ), $exts ) ) {
452  $files[] = $dir . '/' . $file;
453  }
454  } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
455  $files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
456  }
457  }
458 
459  return $files;
460  }
461 
476  private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
477  if ( strpos( $auxExtension, '.' ) !== 0 ) {
478  $auxExtension = '.' . $auxExtension;
479  }
480 
481  $d = dirname( $file );
482  $n = basename( $file );
483 
484  while ( $maxStrip >= 0 ) {
485  $f = $d . '/' . $n . $auxExtension;
486 
487  if ( file_exists( $f ) ) {
488  return $f;
489  }
490 
491  $idx = strrpos( $n, '.' );
492  if ( !$idx ) {
493  break;
494  }
495 
496  $n = substr( $n, 0, $idx );
497  $maxStrip -= 1;
498  }
499 
500  return false;
501  }
502 
512  private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
513  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
514  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
515  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
516  if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
517  return false;
518  }
519 
520  return html_entity_decode( $matches[1] );
521  }
522 
523  private function getFileUserFromSourceWiki( $wiki_host, $file ) {
524  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
525  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
526  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
527  if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
528  return false;
529  }
530 
531  return html_entity_decode( $matches[1] );
532  }
533 
534 }
535 
536 $maintClass = ImportImages::class;
537 require_once RUN_MAINTENANCE_IF_MAIN;
const NS_FILE
Definition: Defines.php:70
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
$matches
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
Definition: ChangeTags.php:92
static getSoftwareTags( $all=false)
Loads defined core tags, checks for invalid types (if not array), and filters for supported and enabl...
Definition: ChangeTags.php:157
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding,...
Definition: FSFile.php:225
execute()
Do the actual work.
__construct()
Default constructor.
MimeMagic helper wrapper.
Definition: MWFileProps.php:28
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static getHandler( $type)
Get a MediaHandler for a given MIME type from the instance cache.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
static getInitialPageText( $comment='', $license='', $copyStatus='', $source='', Config $config=null)
Get the initial image page text based on a comment and optional file status information.
static setUser( $user)
Reset the stub global user to a different "real" user object, while ensuring that any method calls on...
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:664
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:70
static newFromName( $name, $validate='valid')
Definition: User.php:598
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition: User.php:806
const MAINTENANCE_SCRIPT_USER
Username used for various maintenance scripts.
Definition: User.php:116
$maintClass
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
if(!is_readable( $file)) $ext
Definition: router.php:48