MediaWiki  master
importImages.php
Go to the documentation of this file.
1 <?php
35 require_once __DIR__ . '/Maintenance.php';
36 
40 
41 class ImportImages extends Maintenance {
42 
43  public function __construct() {
44  parent::__construct();
45 
46  $this->addDescription( 'Imports images and other media files into the wiki' );
47  $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
48 
49  $this->addOption( 'extensions',
50  'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
51  false,
52  true
53  );
54  $this->addOption( 'overwrite',
55  'Overwrite existing images with the same name (default is to skip them)' );
56  $this->addOption( 'limit',
57  'Limit the number of images to process. Ignored or skipped images are not counted',
58  false,
59  true
60  );
61  $this->addOption( 'from',
62  "Ignore all files until the one with the given name. Useful for resuming aborted "
63  . "imports. The name should be the file's canonical database form.",
64  false,
65  true
66  );
67  $this->addOption( 'skip-dupes',
68  'Skip images that were already uploaded under a different name (check SHA1)' );
69  $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
70  $this->addOption( 'sleep',
71  'Sleep between files. Useful mostly for debugging',
72  false,
73  true
74  );
75  $this->addOption( 'user',
76  "Set username of uploader, default 'Maintenance script'",
77  false,
78  true
79  );
80  // This parameter can optionally have an argument. If none specified, getOption()
81  // returns 1 which is precisely what we need.
82  $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
83  $this->addOption( 'comment',
84  "Set file description, default 'Importing file'",
85  false,
86  true
87  );
88  $this->addOption( 'comment-file',
89  'Set description to the content of this file',
90  false,
91  true
92  );
93  $this->addOption( 'comment-ext',
94  'Causes the description for each file to be loaded from a file with the same name, but '
95  . 'the extension provided. If a global description is also given, it is appended.',
96  false,
97  true
98  );
99  $this->addOption( 'summary',
100  'Upload summary, description will be used if not provided',
101  false,
102  true
103  );
104  $this->addOption( 'license',
105  'Use an optional license template',
106  false,
107  true
108  );
109  $this->addOption( 'timestamp',
110  'Override upload time/date, all MediaWiki timestamp formats are accepted',
111  false,
112  true
113  );
114  $this->addOption( 'protect',
115  'Specify the protect value (autoconfirmed,sysop)',
116  false,
117  true
118  );
119  $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
120  $this->addOption( 'source-wiki-url',
121  'If specified, take User and Comment data for each imported file from this URL. '
122  . 'For example, --source-wiki-url="https://en.wikipedia.org/w/',
123  false,
124  true
125  );
126  $this->addOption( 'dry', "Dry run, don't import anything" );
127  }
128 
129  public function execute() {
130  $services = MediaWikiServices::getInstance();
131  $permissionManager = $services->getPermissionManager();
132 
133  $processed = 0;
134  $statistics = [
135  'ignored' => 0,
136  'added' => 0,
137  'skipped' => 0,
138  'overwritten' => 0,
139  'failed' => 0,
140  ];
141 
142  $this->output( "Importing Files\n\n" );
143 
144  $dir = $this->getArg( 0 );
145 
146  # Check Protection
147  if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
148  $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
149  }
150 
151  if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
152  $this->fatalError( "You must specify a protection option.\n" );
153  }
154 
155  # Prepare the list of allowed extensions
156  $extensions = $this->hasOption( 'extensions' )
157  ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
158  : $this->getConfig()->get( MainConfigNames::FileExtensions );
159 
160  # Search the path provided for candidates for import
161  $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
162  if ( !$files ) {
163  $this->output( "No suitable files could be found for import.\n" );
164  return;
165  }
166 
167  # Initialise the user for this operation
168  $user = $this->hasOption( 'user' )
169  ? User::newFromName( $this->getOption( 'user' ) )
170  : User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
171  if ( !$user instanceof User ) {
172  $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
173  }
174  '@phan-var User $user';
175  StubGlobalUser::setUser( $user );
176 
177  # Get block check. If a value is given, this specified how often the check is performed
178  $checkUserBlock = (int)$this->getOption( 'check-userblock' );
179 
180  $from = $this->getOption( 'from' );
181  $sleep = (int)$this->getOption( 'sleep' );
182  $limit = (int)$this->getOption( 'limit' );
183  $timestamp = $this->getOption( 'timestamp', false );
184 
185  # Get the upload comment. Provide a default one in case there's no comment given.
186  $commentFile = $this->getOption( 'comment-file' );
187  if ( $commentFile !== null ) {
188  $comment = file_get_contents( $commentFile );
189  if ( $comment === false || $comment === null ) {
190  $this->fatalError( "failed to read comment file: {$commentFile}\n" );
191  }
192  } else {
193  $comment = $this->getOption( 'comment', 'Importing file' );
194  }
195  $commentExt = $this->getOption( 'comment-ext' );
196  $summary = $this->getOption( 'summary', '' );
197  $license = $this->getOption( 'license', '' );
198  $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
199 
202  : [];
203 
204  # Batch "upload" operation
205  $restrictionStore = $services->getRestrictionStore();
206  foreach ( $files as $file ) {
207  if ( $sleep && ( $processed > 0 ) ) {
208  sleep( $sleep );
209  }
210 
211  $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
212 
213  # Validate a title
215  if ( !$title ) {
216  $this->output(
217  "{$base} could not be imported; a valid title cannot be produced\n"
218  );
219  continue;
220  }
221 
222  if ( $from ) {
223  if ( $from !== $title->getDBkey() ) {
224  $statistics['ignored']++;
225  continue;
226  }
227  // Found the requested file, continue from here
228  $from = null;
229  }
230 
231  if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
232  $user->clearInstanceCache( 'name' ); // reload from DB!
233  if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
234  $this->output(
235  "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
236  );
237  $statistics['skipped']++;
238  continue;
239  }
240  }
241 
242  # Check existence
243  $image = $services->getRepoGroup()->getLocalRepo()
244  ->newFile( $title );
245  if ( $image->exists() ) {
246  if ( $this->hasOption( 'overwrite' ) ) {
247  $this->output( "{$base} exists, overwriting..." );
248  $svar = 'overwritten';
249  } else {
250  $this->output( "{$base} exists, skipping\n" );
251  $statistics['skipped']++;
252  continue;
253  }
254  } else {
255  if ( $this->hasOption( 'skip-dupes' ) ) {
256  $repo = $image->getRepo();
257  # XXX: we end up calculating this again when actually uploading. that sucks.
259  $dupes = $repo->findBySha1( $sha1 );
260  if ( $dupes ) {
261  $this->output(
262  "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
263  );
264  $statistics['skipped']++;
265  continue;
266  }
267  }
268 
269  $this->output( "Importing {$base}..." );
270  $svar = 'added';
271  }
272 
273  if ( $sourceWikiUrl ) {
274  /* find comment text directly from source wiki, through MW's API */
275  $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
276  $commentText = $real_comment !== false ? $real_comment : $comment;
277 
278  /* find user directly from source wiki, through MW's API */
279  $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
280  if ( $real_user !== false ) {
281  $realUser = User::newFromName( $real_user );
282  if ( $realUser === false ) {
283  # user does not exist in target wiki
284  $this->output(
285  "failed: user '$real_user' does not exist in target wiki."
286  );
287  continue;
288  }
289  StubGlobalUser::setUser( $realUser );
290  $user = $realUser;
291  }
292  } else {
293  # Find comment text
294  $commentText = false;
295 
296  if ( $commentExt ) {
297  $f = $this->findAuxFile( $file, $commentExt );
298  if ( !$f ) {
299  $this->output( " No comment file with extension {$commentExt} found "
300  . "for {$file}, using default comment." );
301  } else {
302  $commentText = file_get_contents( $f );
303  if ( !$commentText ) {
304  $this->output(
305  " Failed to load comment file {$f}, using default comment."
306  );
307  }
308  }
309  }
310 
311  if ( !$commentText ) {
312  $commentText = $comment;
313  }
314  }
315 
316  # Import the file
317  if ( $this->hasOption( 'dry' ) ) {
318  $this->output(
319  " publishing {$file} by '{$user->getName()}', comment '$commentText'..."
320  );
321  } else {
322  $mwProps = new MWFileProps( $services->getMimeAnalyzer() );
323  $props = $mwProps->getPropsFromPath( $file, true );
324  $flags = 0;
325  $publishOptions = [];
326  $handler = MediaHandler::getHandler( $props['mime'] );
327  if ( $handler ) {
328  $publishOptions['headers'] = $handler->getContentHeaders( $props['metadata'] );
329  } else {
330  $publishOptions['headers'] = [];
331  }
332  $archive = $image->publish( $file, $flags, $publishOptions );
333  if ( !$archive->isGood() ) {
334  $this->output( "failed. (" .
335  $archive->getMessage( false, false, 'en' )->text() .
336  ")\n" );
337  $statistics['failed']++;
338  continue;
339  }
340  }
341 
342  $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
343  if ( !$this->hasOption( 'summary' ) ) {
344  $summary = $commentText;
345  }
346 
347  if ( $this->hasOption( 'dry' ) ) {
348  $this->output( "done.\n" );
349  } elseif ( $image->recordUpload3(
350  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
351  $archive->value,
352  $summary,
353  $commentText,
354  $user,
355  // @phan-suppress-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
356  $props,
357  $timestamp,
358  $tags
359  )->isOK() ) {
360  $this->output( "done.\n" );
361 
362  $doProtect = false;
363 
364  $protectLevel = $this->getOption( 'protect' );
365  $restrictionLevels = $this->getConfig()->get( MainConfigNames::RestrictionLevels );
366 
367  if ( $protectLevel && in_array( $protectLevel, $restrictionLevels ) ) {
368  $doProtect = true;
369  }
370  if ( $this->hasOption( 'unprotect' ) ) {
371  $protectLevel = '';
372  $doProtect = true;
373  }
374 
375  if ( $doProtect ) {
376  # Protect the file
377  $this->output( "\nWaiting for replica DBs...\n" );
378  // Wait for replica DBs.
379  sleep( 2 ); # Why this sleep?
380  $this->waitForReplication();
381 
382  $this->output( "\nSetting image restrictions ..." );
383 
384  $cascade = false;
385  $restrictions = [];
386  foreach ( $restrictionStore->listApplicableRestrictionTypes( $title ) as $type ) {
387  $restrictions[$type] = $protectLevel;
388  }
389 
390  $page = $services->getWikiPageFactory()->newFromTitle( $title );
391  $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
392  $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
393  }
394  } else {
395  $this->output( "failed. (at recordUpload stage)\n" );
396  $svar = 'failed';
397  }
398 
399  $statistics[$svar]++;
400  $processed++;
401 
402  if ( $limit && $processed >= $limit ) {
403  break;
404  }
405  }
406 
407  # Print out some statistics
408  $this->output( "\n" );
409  foreach ( array_merge(
410  [
411  'Found' => count( $files ),
412  'Limit' => $limit,
413  ],
414  $statistics
415  ) as $desc => $number ) {
416  if ( $number > 0 ) {
417  $this->output( ucfirst( $desc ) . ": $number\n" );
418  }
419  }
420  }
421 
430  private function findFiles( $dir, $exts, $recurse = false ) {
431  $dhl = is_dir( $dir ) ? opendir( $dir ) : false;
432  if ( !$dhl ) {
433  return [];
434  }
435 
436  $files = [];
437  while ( ( $file = readdir( $dhl ) ) !== false ) {
438  if ( is_file( $dir . '/' . $file ) ) {
439  $ext = pathinfo( $file, PATHINFO_EXTENSION );
440  if ( in_array( strtolower( $ext ), $exts ) ) {
441  $files[] = $dir . '/' . $file;
442  }
443  } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
444  $files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
445  }
446  }
447 
448  return $files;
449  }
450 
465  private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
466  if ( !str_starts_with( $auxExtension, '.' ) ) {
467  $auxExtension = '.' . $auxExtension;
468  }
469 
470  $d = dirname( $file );
471  $n = basename( $file );
472 
473  while ( $maxStrip >= 0 ) {
474  $f = $d . '/' . $n . $auxExtension;
475 
476  if ( file_exists( $f ) ) {
477  return $f;
478  }
479 
480  $idx = strrpos( $n, '.' );
481  if ( !$idx ) {
482  break;
483  }
484 
485  $n = substr( $n, 0, $idx );
486  $maxStrip -= 1;
487  }
488 
489  return false;
490  }
491 
501  private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
502  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
503  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
504  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
505  if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
506  return false;
507  }
508 
509  return html_entity_decode( $matches[1] );
510  }
511 
512  private function getFileUserFromSourceWiki( $wiki_host, $file ) {
513  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
514  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
515  $body = MediaWikiServices::getInstance()->getHttpRequestFactory()->get( $url, [], __METHOD__ );
516  if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
517  return false;
518  }
519 
520  return html_entity_decode( $matches[1] );
521  }
522 
523 }
524 
525 $maintClass = ImportImages::class;
526 require_once RUN_MAINTENANCE_IF_MAIN;
const NS_FILE
Definition: Defines.php:70
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
$matches
const TAG_SERVER_SIDE_UPLOAD
This tagged edit was performed while importing media files using the importImages....
Definition: ChangeTags.php:93
static getSoftwareTags( $all=false)
Loads defined core tags, checks for invalid types (if not array), and filters for supported and enabl...
Definition: ChangeTags.php:160
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding,...
Definition: FSFile.php:225
execute()
Do the actual work.
__construct()
Default constructor.
MimeMagic helper wrapper.
Definition: MWFileProps.php:28
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static getHandler( $type)
Get a MediaHandler for a given MIME type from the instance cache.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Stub object for the global user ($wgUser) that makes it possible to change the relevant underlying ob...
static getInitialPageText( $comment='', $license='', $copyStatus='', $source='', Config $config=null)
Get the initial image page text based on a comment and optional file status information.
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:667
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:70
static newFromName( $name, $validate='valid')
Definition: User.php:591
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition: User.php:799
const MAINTENANCE_SCRIPT_USER
Username used for various maintenance scripts.
Definition: User.php:116
$maintClass
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
if(!is_readable( $file)) $ext
Definition: router.php:48