MediaWiki  master
ForeignAPIRepo.php
Go to the documentation of this file.
1 <?php
25 
41 class ForeignAPIRepo extends FileRepo {
42  /* This version string is used in the user agent for requests and will help
43  * server maintainers in identify ForeignAPI usage.
44  * Update the version every time you make breaking or significant changes. */
45  const VERSION = "2.1";
46 
51  protected static $imageInfoProps = [
52  'url',
53  'timestamp',
54  ];
55 
56  protected $fileFactory = [ ForeignAPIFile::class, 'newFromTitle' ];
58  protected $apiThumbCacheExpiry = 86400; // 1 day (24*3600)
59 
61  protected $fileCacheExpiry = 2592000; // 1 month (30*24*3600)
62 
64  protected $mFileExists = [];
65 
67  private $mApiBase;
68 
72  function __construct( $info ) {
73  global $wgLocalFileRepo;
74  parent::__construct( $info );
75 
76  // https://commons.wikimedia.org/w/api.php
77  $this->mApiBase = $info['apibase'] ?? null;
78 
79  if ( isset( $info['apiThumbCacheExpiry'] ) ) {
80  $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
81  }
82  if ( isset( $info['fileCacheExpiry'] ) ) {
83  $this->fileCacheExpiry = $info['fileCacheExpiry'];
84  }
85  if ( !$this->scriptDirUrl ) {
86  // hack for description fetches
87  $this->scriptDirUrl = dirname( $this->mApiBase );
88  }
89  // If we can cache thumbs we can guess sane defaults for these
90  if ( $this->canCacheThumbs() && !$this->url ) {
91  $this->url = $wgLocalFileRepo['url'];
92  }
93  if ( $this->canCacheThumbs() && !$this->thumbUrl ) {
94  $this->thumbUrl = $this->url . '/thumb';
95  }
96  }
97 
102  function getApiUrl() {
103  return $this->mApiBase;
104  }
105 
114  function newFile( $title, $time = false ) {
115  if ( $time ) {
116  return false;
117  }
118 
119  return parent::newFile( $title, $time );
120  }
121 
126  function fileExistsBatch( array $files ) {
127  $results = [];
128  foreach ( $files as $k => $f ) {
129  if ( isset( $this->mFileExists[$f] ) ) {
130  $results[$k] = $this->mFileExists[$f];
131  unset( $files[$k] );
132  } elseif ( self::isVirtualUrl( $f ) ) {
133  # @todo FIXME: We need to be able to handle virtual
134  # URLs better, at least when we know they refer to the
135  # same repo.
136  $results[$k] = false;
137  unset( $files[$k] );
138  } elseif ( FileBackend::isStoragePath( $f ) ) {
139  $results[$k] = false;
140  unset( $files[$k] );
141  wfWarn( "Got mwstore:// path '$f'." );
142  }
143  }
144 
145  $data = $this->fetchImageQuery( [
146  'titles' => implode( '|', $files ),
147  'prop' => 'imageinfo' ]
148  );
149 
150  if ( isset( $data['query']['pages'] ) ) {
151  # First, get results from the query. Note we only care whether the image exists,
152  # not whether it has a description page.
153  foreach ( $data['query']['pages'] as $p ) {
154  $this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
155  }
156  # Second, copy the results to any redirects that were queried
157  if ( isset( $data['query']['redirects'] ) ) {
158  foreach ( $data['query']['redirects'] as $r ) {
159  $this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
160  }
161  }
162  # Third, copy the results to any non-normalized titles that were queried
163  if ( isset( $data['query']['normalized'] ) ) {
164  foreach ( $data['query']['normalized'] as $n ) {
165  $this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
166  }
167  }
168  # Finally, copy the results to the output
169  foreach ( $files as $key => $file ) {
170  $results[$key] = $this->mFileExists[$file];
171  }
172  }
173 
174  return $results;
175  }
176 
181  function getFileProps( $virtualUrl ) {
182  return [];
183  }
184 
189  function fetchImageQuery( $query ) {
190  global $wgLanguageCode;
191 
192  $query = array_merge( $query,
193  [
194  'format' => 'json',
195  'action' => 'query',
196  'redirects' => 'true'
197  ] );
198 
199  if ( !isset( $query['uselang'] ) ) { // uselang is unset or null
200  $query['uselang'] = $wgLanguageCode;
201  }
202 
203  $data = $this->httpGetCached( 'Metadata', $query );
204 
205  if ( $data ) {
206  return FormatJson::decode( $data, true );
207  } else {
208  return null;
209  }
210  }
211 
216  function getImageInfo( $data ) {
217  if ( $data && isset( $data['query']['pages'] ) ) {
218  foreach ( $data['query']['pages'] as $info ) {
219  if ( isset( $info['imageinfo'][0] ) ) {
220  $return = $info['imageinfo'][0];
221  if ( isset( $info['pageid'] ) ) {
222  $return['pageid'] = $info['pageid'];
223  }
224  return $return;
225  }
226  }
227  }
228 
229  return false;
230  }
231 
236  function findBySha1( $hash ) {
237  $results = $this->fetchImageQuery( [
238  'aisha1base36' => $hash,
239  'aiprop' => ForeignAPIFile::getProps(),
240  'list' => 'allimages',
241  ] );
242  $ret = [];
243  if ( isset( $results['query']['allimages'] ) ) {
244  foreach ( $results['query']['allimages'] as $img ) {
245  // 1.14 was broken, doesn't return name attribute
246  if ( !isset( $img['name'] ) ) {
247  continue;
248  }
249  $ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
250  }
251  }
252 
253  return $ret;
254  }
255 
265  function getThumbUrl( $name, $width = -1, $height = -1, &$result = null, $otherParams = '' ) {
266  $data = $this->fetchImageQuery( [
267  'titles' => 'File:' . $name,
268  'iiprop' => self::getIIProps(),
269  'iiurlwidth' => $width,
270  'iiurlheight' => $height,
271  'iiurlparam' => $otherParams,
272  'prop' => 'imageinfo' ] );
273  $info = $this->getImageInfo( $data );
274 
275  if ( $data && $info && isset( $info['thumburl'] ) ) {
276  wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] . "\n" );
277  $result = $info;
278 
279  return $info['thumburl'];
280  } else {
281  return false;
282  }
283  }
284 
294  function getThumbError( $name, $width = -1, $height = -1, $otherParams = '', $lang = null ) {
295  $data = $this->fetchImageQuery( [
296  'titles' => 'File:' . $name,
297  'iiprop' => self::getIIProps(),
298  'iiurlwidth' => $width,
299  'iiurlheight' => $height,
300  'iiurlparam' => $otherParams,
301  'prop' => 'imageinfo',
302  'uselang' => $lang,
303  ] );
304  $info = $this->getImageInfo( $data );
305 
306  if ( $data && $info && isset( $info['thumberror'] ) ) {
307  wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] . "\n" );
308 
309  return new MediaTransformError(
310  'thumbnail_error_remote',
311  $width,
312  $height,
313  $this->getDisplayName(),
314  $info['thumberror'] // already parsed message from foreign repo
315  );
316  } else {
317  return false;
318  }
319  }
320 
334  function getThumbUrlFromCache( $name, $width, $height, $params = "" ) {
335  // We can't check the local cache using FileRepo functions because
336  // we override fileExistsBatch(). We have to use the FileBackend directly.
337  $backend = $this->getBackend(); // convenience
338 
339  if ( !$this->canCacheThumbs() ) {
340  $result = null; // can't pass "null" by reference, but it's ok as default value
341  return $this->getThumbUrl( $name, $width, $height, $result, $params );
342  }
343  $key = $this->getLocalCacheKey( 'ForeignAPIRepo', 'ThumbUrl', $name );
344  $sizekey = "$width:$height:$params";
345 
346  /* Get the array of urls that we already know */
347  $knownThumbUrls = $this->wanCache->get( $key );
348  if ( !$knownThumbUrls ) {
349  /* No knownThumbUrls for this file */
350  $knownThumbUrls = [];
351  } elseif ( isset( $knownThumbUrls[$sizekey] ) ) {
352  wfDebug( __METHOD__ . ': Got thumburl from local cache: ' .
353  "{$knownThumbUrls[$sizekey]} \n" );
354 
355  return $knownThumbUrls[$sizekey];
356  }
357 
358  $metadata = null;
359  $foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params );
360 
361  if ( !$foreignUrl ) {
362  wfDebug( __METHOD__ . " Could not find thumburl\n" );
363 
364  return false;
365  }
366 
367  // We need the same filename as the remote one :)
368  $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
369  if ( !$this->validateFilename( $fileName ) ) {
370  wfDebug( __METHOD__ . " The deduced filename $fileName is not safe\n" );
371 
372  return false;
373  }
374  $localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
375  $localFilename = $localPath . "/" . $fileName;
376  $localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) .
377  rawurlencode( $name ) . "/" . rawurlencode( $fileName );
378 
379  if ( $backend->fileExists( [ 'src' => $localFilename ] )
380  && isset( $metadata['timestamp'] )
381  ) {
382  wfDebug( __METHOD__ . " Thumbnail was already downloaded before\n" );
383  $modified = $backend->getFileTimestamp( [ 'src' => $localFilename ] );
384  $remoteModified = strtotime( $metadata['timestamp'] );
385  $current = time();
386  $diff = abs( $modified - $current );
387  if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
388  /* Use our current and already downloaded thumbnail */
389  $knownThumbUrls[$sizekey] = $localUrl;
390  $this->wanCache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
391 
392  return $localUrl;
393  }
394  /* There is a new Commons file, or existing thumbnail older than a month */
395  }
396 
397  $thumb = self::httpGet( $foreignUrl, 'default', [], $mtime );
398  if ( !$thumb ) {
399  wfDebug( __METHOD__ . " Could not download thumb\n" );
400 
401  return false;
402  }
403 
404  # @todo FIXME: Delete old thumbs that aren't being used. Maintenance script?
405  $backend->prepare( [ 'dir' => dirname( $localFilename ) ] );
406  $params = [ 'dst' => $localFilename, 'content' => $thumb ];
407  if ( !$backend->quickCreate( $params )->isOK() ) {
408  wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'\n" );
409 
410  return $foreignUrl;
411  }
412  $knownThumbUrls[$sizekey] = $localUrl;
413 
414  $ttl = $mtime
415  ? $this->wanCache->adaptiveTTL( $mtime, $this->apiThumbCacheExpiry )
417  $this->wanCache->set( $key, $knownThumbUrls, $ttl );
418  wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache \n" );
419 
420  return $localUrl;
421  }
422 
429  function getZoneUrl( $zone, $ext = null ) {
430  switch ( $zone ) {
431  case 'public':
432  return $this->url;
433  case 'thumb':
434  return $this->thumbUrl;
435  default:
436  return parent::getZoneUrl( $zone, $ext );
437  }
438  }
439 
445  function getZonePath( $zone ) {
446  $supported = [ 'public', 'thumb' ];
447  if ( in_array( $zone, $supported ) ) {
448  return parent::getZonePath( $zone );
449  }
450 
451  return false;
452  }
453 
458  public function canCacheThumbs() {
459  return ( $this->apiThumbCacheExpiry > 0 );
460  }
461 
466  public static function getUserAgent() {
467  return Http::userAgent() . " ForeignAPIRepo/" . self::VERSION;
468  }
469 
476  function getInfo() {
477  $info = parent::getInfo();
478  $info['apiurl'] = $this->getApiUrl();
479 
480  $query = [
481  'format' => 'json',
482  'action' => 'query',
483  'meta' => 'siteinfo',
484  'siprop' => 'general',
485  ];
486 
487  $data = $this->httpGetCached( 'SiteInfo', $query, 7200 );
488 
489  if ( $data ) {
490  $siteInfo = FormatJson::decode( $data, true );
491  $general = $siteInfo['query']['general'];
492 
493  $info['articlepath'] = $general['articlepath'];
494  $info['server'] = $general['server'];
495 
496  if ( isset( $general['favicon'] ) ) {
497  $info['favicon'] = $general['favicon'];
498  }
499  }
500 
501  return $info;
502  }
503 
514  public static function httpGet(
515  $url, $timeout = 'default', $options = [], &$mtime = false
516  ) {
517  $options['timeout'] = $timeout;
518  /* Http::get */
520  wfDebug( "ForeignAPIRepo: HTTP GET: $url\n" );
521  $options['method'] = "GET";
522 
523  if ( !isset( $options['timeout'] ) ) {
524  $options['timeout'] = 'default';
525  }
526 
527  $req = MWHttpRequest::factory( $url, $options, __METHOD__ );
528  $req->setUserAgent( self::getUserAgent() );
529  $status = $req->execute();
530 
531  if ( $status->isOK() ) {
532  $lmod = $req->getResponseHeader( 'Last-Modified' );
533  $mtime = $lmod ? wfTimestamp( TS_UNIX, $lmod ) : false;
534 
535  return $req->getContent();
536  } else {
537  $logger = LoggerFactory::getInstance( 'http' );
538  $logger->warning(
539  $status->getWikiText( false, false, 'en' ),
540  [ 'caller' => 'ForeignAPIRepo::httpGet' ]
541  );
542 
543  return false;
544  }
545  }
546 
551  protected static function getIIProps() {
552  return implode( '|', self::$imageInfoProps );
553  }
554 
562  public function httpGetCached( $target, $query, $cacheTTL = 3600 ) {
563  if ( $this->mApiBase ) {
564  $url = wfAppendQuery( $this->mApiBase, $query );
565  } else {
566  $url = $this->makeUrl( $query, 'api' );
567  }
568 
569  return $this->wanCache->getWithSetCallback(
570  $this->getLocalCacheKey( static::class, $target, md5( $url ) ),
571  $cacheTTL,
572  function ( $curValue, &$ttl ) use ( $url ) {
573  $html = self::httpGet( $url, 'default', [], $mtime );
574  if ( $html !== false ) {
575  $ttl = $mtime ? $this->wanCache->adaptiveTTL( $mtime, $ttl ) : $ttl;
576  } else {
577  $ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl );
578  $html = null; // caches negatives
579  }
580 
581  return $html;
582  },
583  [ 'pcGroup' => 'http-get:3', 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
584  );
585  }
586 
591  function enumFiles( $callback ) {
592  throw new MWException( 'enumFiles is not supported by ' . static::class );
593  }
594 
598  protected function assertWritableRepo() {
599  throw new MWException( static::class . ': write operations are not supported.' );
600  }
601 }
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
getThumbUrlFromCache( $name, $width, $height, $params="")
Return the imageurl from cache if possible.
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
getZonePath( $zone)
Get the local directory corresponding to one of the basic zones.
httpGetCached( $target, $query, $cacheTTL=3600)
HTTP GET request to a mediawiki API (with caching)
getLocalCacheKey(... $args)
Get a key for this repo in the local cache domain.
Definition: FileRepo.php:1852
makeUrl( $query='', $entry='index')
Make an url to this repo.
Definition: FileRepo.php:750
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
if(!isset( $args[0])) $lang
string false $url
Public zone URL.
Definition: FileRepo.php:103
$wgLocalFileRepo
File repository structures.
enumFiles( $callback)
int $apiThumbCacheExpiry
Check back with Commons after this expiry.
getFileTimestamp(array $params)
Get the last-modified timestamp of the file at a storage path.
static isStoragePath( $path)
Check if a given path is a "mwstore://" path.
fileExistsBatch(array $files)
static userAgent()
A standard user-agent we can use for external requests.
Definition: Http.php:98
getBackend()
Get the file backend instance.
Definition: FileRepo.php:225
$wgLanguageCode
Site language code.
wfAppendQuery( $url, $query)
Append a query string to an existing URL, which may or may not already have query string parameters a...
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Foreign file accessible through api.php requests.
fetchImageQuery( $query)
quickCreate(array $params)
Performs a single quick create operation.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static decode( $value, $assoc=false)
Decodes a JSON string.
Definition: FormatJson.php:174
string $name
Definition: FileRepo.php:149
static getProps()
Get the property string for iiprop and aiprop.
fileExists(array $params)
Check if a file exists at a storage path in the backend.
static $imageInfoProps
List of iiprop values for the thumbnail fetch queries.
string $thumbUrl
The base thumbnail URL.
Definition: FileRepo.php:106
static httpGet( $url, $timeout='default', $options=[], &$mtime=false)
Like a HttpRequestFactory::get request, but with custom User-Agent.
const NS_FILE
Definition: Defines.php:66
getThumbUrl( $name, $width=-1, $height=-1, &$result=null, $otherParams='')
const PROTO_HTTP
Definition: Defines.php:199
prepare(array $params)
Prepare a storage directory for usage.
getThumbError( $name, $width=-1, $height=-1, $otherParams='', $lang=null)
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:586
getInfo()
Get information about the repo - overrides/extends the parent class&#39;s information.
getDisplayName()
Get the human-readable name of the repo.
Definition: FileRepo.php:1795
if(!is_readable( $file)) $ext
Definition: router.php:48
Base class for file repositories.
Definition: FileRepo.php:39
getHashPath( $name)
Get a relative path including trailing slash, e.g.
Definition: FileRepo.php:689
A foreign repository with a remote MediaWiki with an API thingy.
FileBackend $backend
Definition: FileRepo.php:62
canCacheThumbs()
Are we locally caching the thumbnails?
validateFilename( $filename)
Determine if a relative path is valid, i.e.
Definition: FileRepo.php:1697
int $fileCacheExpiry
Redownload thumbnail files after this expiry.
getFileProps( $virtualUrl)
newFile( $title, $time=false)
Per docs in FileRepo, this needs to return false if we don&#39;t support versioned files.
static getUserAgent()
The user agent the ForeignAPIRepo will use.
Basic media transform error class.
getZoneUrl( $zone, $ext=null)