MediaWiki  master
importImages.php
Go to the documentation of this file.
1 <?php
35 require_once __DIR__ . '/Maintenance.php';
36 
38 
39 class ImportImages extends Maintenance {
40 
41  public function __construct() {
42  parent::__construct();
43 
44  $this->addDescription( 'Imports images and other media files into the wiki' );
45  $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
46 
47  $this->addOption( 'extensions',
48  'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
49  false,
50  true
51  );
52  $this->addOption( 'overwrite',
53  'Overwrite existing images with the same name (default is to skip them)' );
54  $this->addOption( 'limit',
55  'Limit the number of images to process. Ignored or skipped images are not counted',
56  false,
57  true
58  );
59  $this->addOption( 'from',
60  "Ignore all files until the one with the given name. Useful for resuming aborted "
61  . "imports. The name should be the file's canonical database form.",
62  false,
63  true
64  );
65  $this->addOption( 'skip-dupes',
66  'Skip images that were already uploaded under a different name (check SHA1)' );
67  $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
68  $this->addOption( 'sleep',
69  'Sleep between files. Useful mostly for debugging',
70  false,
71  true
72  );
73  $this->addOption( 'user',
74  "Set username of uploader, default 'Maintenance script'",
75  false,
76  true
77  );
78  // This parameter can optionally have an argument. If none specified, getOption()
79  // returns 1 which is precisely what we need.
80  $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
81  $this->addOption( 'comment',
82  "Set file description, default 'Importing file'",
83  false,
84  true
85  );
86  $this->addOption( 'comment-file',
87  'Set description to the content of this file',
88  false,
89  true
90  );
91  $this->addOption( 'comment-ext',
92  'Causes the description for each file to be loaded from a file with the same name, but '
93  . 'the extension provided. If a global description is also given, it is appended.',
94  false,
95  true
96  );
97  $this->addOption( 'summary',
98  'Upload summary, description will be used if not provided',
99  false,
100  true
101  );
102  $this->addOption( 'license',
103  'Use an optional license template',
104  false,
105  true
106  );
107  $this->addOption( 'timestamp',
108  'Override upload time/date, all MediaWiki timestamp formats are accepted',
109  false,
110  true
111  );
112  $this->addOption( 'protect',
113  'Specify the protect value (autoconfirmed,sysop)',
114  false,
115  true
116  );
117  $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
118  $this->addOption( 'source-wiki-url',
119  'If specified, take User and Comment data for each imported file from this URL. '
120  . 'For example, --source-wiki-url="http://en.wikipedia.org/',
121  false,
122  true
123  );
124  $this->addOption( 'dry', "Dry run, don't import anything" );
125  }
126 
127  public function execute() {
128  global $wgFileExtensions, $wgUser, $wgRestrictionLevels;
129 
130  $permissionManager = MediaWikiServices::getInstance()->getPermissionManager();
131 
132  $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
133 
134  $this->output( "Importing Files\n\n" );
135 
136  $dir = $this->getArg( 0 );
137 
138  # Check Protection
139  if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
140  $this->fatalError( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
141  }
142 
143  if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
144  $this->fatalError( "You must specify a protection option.\n" );
145  }
146 
147  # Prepare the list of allowed extensions
148  $extensions = $this->hasOption( 'extensions' )
149  ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
151 
152  # Search the path provided for candidates for import
153  $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
154 
155  # Initialise the user for this operation
156  $user = $this->hasOption( 'user' )
157  ? User::newFromName( $this->getOption( 'user' ) )
158  : User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
159  if ( !$user instanceof User ) {
160  $user = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
161  }
162  $wgUser = $user;
163 
164  # Get block check. If a value is given, this specified how often the check is performed
165  $checkUserBlock = (int)$this->getOption( 'check-userblock' );
166 
167  $from = $this->getOption( 'from' );
168  $sleep = (int)$this->getOption( 'sleep' );
169  $limit = (int)$this->getOption( 'limit' );
170  $timestamp = $this->getOption( 'timestamp', false );
171 
172  # Get the upload comment. Provide a default one in case there's no comment given.
173  $commentFile = $this->getOption( 'comment-file' );
174  if ( $commentFile !== null ) {
175  $comment = file_get_contents( $commentFile );
176  if ( $comment === false || $comment === null ) {
177  $this->fatalError( "failed to read comment file: {$commentFile}\n" );
178  }
179  } else {
180  $comment = $this->getOption( 'comment', 'Importing file' );
181  }
182  $commentExt = $this->getOption( 'comment-ext' );
183  $summary = $this->getOption( 'summary', '' );
184 
185  $license = $this->getOption( 'license', '' );
186 
187  $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
188 
189  # Batch "upload" operation
190  $count = count( $files );
191  if ( $count > 0 ) {
192  foreach ( $files as $file ) {
193  if ( $sleep && ( $processed > 0 ) ) {
194  sleep( $sleep );
195  }
196 
197  $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
198 
199  # Validate a title
201  if ( !is_object( $title ) ) {
202  $this->output(
203  "{$base} could not be imported; a valid title cannot be produced\n"
204  );
205  continue;
206  }
207 
208  if ( $from ) {
209  if ( $from == $title->getDBkey() ) {
210  $from = null;
211  } else {
212  $ignored++;
213  continue;
214  }
215  }
216 
217  if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
218  $user->clearInstanceCache( 'name' ); // reload from DB!
219  if ( $permissionManager->isBlockedFrom( $user, $title ) ) {
220  $this->output(
221  "{$user->getName()} is blocked from {$title->getPrefixedText()}! skipping.\n"
222  );
223  $skipped++;
224  continue;
225  }
226  }
227 
228  # Check existence
229  $image = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
230  ->newFile( $title );
231  if ( $image->exists() ) {
232  if ( $this->hasOption( 'overwrite' ) ) {
233  $this->output( "{$base} exists, overwriting..." );
234  $svar = 'overwritten';
235  } else {
236  $this->output( "{$base} exists, skipping\n" );
237  $skipped++;
238  continue;
239  }
240  } else {
241  if ( $this->hasOption( 'skip-dupes' ) ) {
242  $repo = $image->getRepo();
243  # XXX: we end up calculating this again when actually uploading. that sucks.
244  $sha1 = FSFile::getSha1Base36FromPath( $file );
245 
246  $dupes = $repo->findBySha1( $sha1 );
247 
248  if ( $dupes ) {
249  $this->output(
250  "{$base} already exists as {$dupes[0]->getName()}, skipping\n"
251  );
252  $skipped++;
253  continue;
254  }
255  }
256 
257  $this->output( "Importing {$base}..." );
258  $svar = 'added';
259  }
260 
261  if ( $sourceWikiUrl ) {
262  /* find comment text directly from source wiki, through MW's API */
263  $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
264  if ( $real_comment === false ) {
265  $commentText = $comment;
266  } else {
267  $commentText = $real_comment;
268  }
269 
270  /* find user directly from source wiki, through MW's API */
271  $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
272  if ( $real_user === false ) {
273  $wgUser = $user;
274  } else {
275  $wgUser = User::newFromName( $real_user );
276  if ( $wgUser === false ) {
277  # user does not exist in target wiki
278  $this->output(
279  "failed: user '$real_user' does not exist in target wiki."
280  );
281  continue;
282  }
283  }
284  } else {
285  # Find comment text
286  $commentText = false;
287 
288  if ( $commentExt ) {
289  $f = $this->findAuxFile( $file, $commentExt );
290  if ( !$f ) {
291  $this->output( " No comment file with extension {$commentExt} found "
292  . "for {$file}, using default comment. " );
293  } else {
294  $commentText = file_get_contents( $f );
295  if ( !$commentText ) {
296  $this->output(
297  " Failed to load comment file {$f}, using default comment. "
298  );
299  }
300  }
301  }
302 
303  if ( !$commentText ) {
304  $commentText = $comment;
305  }
306  }
307 
308  # Import the file
309  if ( $this->hasOption( 'dry' ) ) {
310  $this->output(
311  " publishing {$file} by '{$wgUser->getName()}', comment '$commentText'... "
312  );
313  } else {
314  $mwProps = new MWFileProps( MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer() );
315  $props = $mwProps->getPropsFromPath( $file, true );
316  $flags = 0;
317  $publishOptions = [];
318  $handler = MediaHandler::getHandler( $props['mime'] );
319  if ( $handler ) {
320  $metadata = \Wikimedia\AtEase\AtEase::quietCall( 'unserialize', $props['metadata'] );
321 
322  $publishOptions['headers'] = $handler->getContentHeaders( $metadata );
323  } else {
324  $publishOptions['headers'] = [];
325  }
326  $archive = $image->publish( $file, $flags, $publishOptions );
327  if ( !$archive->isGood() ) {
328  $this->output( "failed. (" .
329  $archive->getMessage( false, false, 'en' )->text() .
330  ")\n" );
331  $failed++;
332  continue;
333  }
334  }
335 
336  $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
337  if ( !$this->hasOption( 'summary' ) ) {
338  $summary = $commentText;
339  }
340 
341  if ( $this->hasOption( 'dry' ) ) {
342  $this->output( "done.\n" );
343  } elseif ( $image->recordUpload2(
344  $archive->value,
345  $summary,
346  $commentText,
347  $props,
348  $timestamp
349  )->isOK() ) {
350  $this->output( "done.\n" );
351 
352  $doProtect = false;
353 
354  $protectLevel = $this->getOption( 'protect' );
355 
356  if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
357  $doProtect = true;
358  }
359  if ( $this->hasOption( 'unprotect' ) ) {
360  $protectLevel = '';
361  $doProtect = true;
362  }
363 
364  if ( $doProtect ) {
365  # Protect the file
366  $this->output( "\nWaiting for replica DBs...\n" );
367  // Wait for replica DBs.
368  sleep( 2 ); # Why this sleep?
369  wfWaitForSlaves();
370 
371  $this->output( "\nSetting image restrictions ... " );
372 
373  $cascade = false;
374  $restrictions = [];
375  foreach ( $title->getRestrictionTypes() as $type ) {
376  $restrictions[$type] = $protectLevel;
377  }
378 
379  $page = WikiPage::factory( $title );
380  $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
381  $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
382  }
383  } else {
384  $this->output( "failed. (at recordUpload stage)\n" );
385  $svar = 'failed';
386  }
387 
388  $$svar++;
389  $processed++;
390 
391  if ( $limit && $processed >= $limit ) {
392  break;
393  }
394  }
395 
396  # Print out some statistics
397  $this->output( "\n" );
398  foreach (
399  [
400  'count' => 'Found',
401  'limit' => 'Limit',
402  'ignored' => 'Ignored',
403  'added' => 'Added',
404  'skipped' => 'Skipped',
405  'overwritten' => 'Overwritten',
406  'failed' => 'Failed'
407  ] as $var => $desc
408  ) {
409  if ( $$var > 0 ) {
410  $this->output( "{$desc}: {$$var}\n" );
411  }
412  }
413  } else {
414  $this->output( "No suitable files could be found for import.\n" );
415  }
416  }
417 
426  private function findFiles( $dir, $exts, $recurse = false ) {
427  if ( is_dir( $dir ) ) {
428  $dhl = opendir( $dir );
429  if ( $dhl ) {
430  $files = [];
431  while ( ( $file = readdir( $dhl ) ) !== false ) {
432  if ( is_file( $dir . '/' . $file ) ) {
433  $ext = pathinfo( $file, PATHINFO_EXTENSION );
434  if ( array_search( strtolower( $ext ), $exts ) !== false ) {
435  $files[] = $dir . '/' . $file;
436  }
437  } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
438  $files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
439  }
440  }
441 
442  return $files;
443  } else {
444  return [];
445  }
446  } else {
447  return [];
448  }
449  }
450 
465  private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
466  if ( strpos( $auxExtension, '.' ) !== 0 ) {
467  $auxExtension = '.' . $auxExtension;
468  }
469 
470  $d = dirname( $file );
471  $n = basename( $file );
472 
473  while ( $maxStrip >= 0 ) {
474  $f = $d . '/' . $n . $auxExtension;
475 
476  if ( file_exists( $f ) ) {
477  return $f;
478  }
479 
480  $idx = strrpos( $n, '.' );
481  if ( !$idx ) {
482  break;
483  }
484 
485  $n = substr( $n, 0, $idx );
486  $maxStrip -= 1;
487  }
488 
489  return false;
490  }
491 
492  # @todo FIXME: Access the api in a saner way and performing just one query
493  # (preferably batching files too).
494  private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
495  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
496  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
497  $body = Http::get( $url, [], __METHOD__ );
498  if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
499  return false;
500  }
501 
502  return html_entity_decode( $matches[1] );
503  }
504 
505  private function getFileUserFromSourceWiki( $wiki_host, $file ) {
506  $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
507  . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
508  $body = Http::get( $url, [], __METHOD__ );
509  if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
510  return false;
511  }
512 
513  return html_entity_decode( $matches[1] );
514  }
515 
516 }
517 
518 $maintClass = ImportImages::class;
519 require_once RUN_MAINTENANCE_IF_MAIN;
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:142
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
wfBaseName( $path, $suffix='')
Return the final portion of a pathname.
getArg( $argId=0, $default=null)
Get an argument.
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
static getInitialPageText( $comment='', $license='', $copyStatus='', $source='', Config $config=null)
Get the initial image page text based on a comment and optional file status information.
getOption( $name, $default=null)
Get an option, or return the default.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:86
$wgRestrictionLevels
Rights which can be required for each protection level (via action=protect)
hasOption( $name)
Checks to see if a particular option exists.
A helper class for throttling authentication attempts.
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:51
$maintClass
getFileCommentFromSourceWiki( $wiki_host, $file)
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
addDescription( $text)
Set the description text.
addArg( $arg, $description, $required=true)
Add some args that are needed.
const NS_FILE
Definition: Defines.php:66
output( $out, $channel=null)
Throw some output to the user.
static getSha1Base36FromPath( $path)
Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case encoding, zero padded to 31 digits.
Definition: FSFile.php:225
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:612
$wgFileExtensions
This is the list of preferred extensions for uploading files.
if(!is_readable( $file)) $ext
Definition: router.php:48
MimeMagic helper wrapper.
Definition: MWFileProps.php:28
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
findFiles( $dir, $exts, $recurse=false)
Search a directory for files with one of a set of extensions.
findAuxFile( $file, $auxExtension, $maxStrip=1)
Find an auxilliary file with the given extension, matching the give base file path.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static newFromName( $name, $validate='valid')
Static factory method for creation from username.
Definition: User.php:515
getFileUserFromSourceWiki( $wiki_host, $file)
static getHandler( $type)
Get a MediaHandler for a given MIME type from the instance cache.
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition: User.php:737
static get( $url, array $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( &#39;GET&#39; )
Definition: Http.php:64
$matches