MediaWiki  master
ForeignAPIRepo.php
Go to the documentation of this file.
1 <?php
25 
41 class ForeignAPIRepo extends FileRepo {
42  /* This version string is used in the user agent for requests and will help
43  * server maintainers in identify ForeignAPI usage.
44  * Update the version every time you make breaking or significant changes. */
45  private const VERSION = "2.1";
46 
51  protected static $imageInfoProps = [
52  'url',
53  'timestamp',
54  ];
55 
56  protected $fileFactory = [ ForeignAPIFile::class, 'newFromTitle' ];
58  protected $apiThumbCacheExpiry = 86400; // 1 day (24*3600)
59 
61  protected $fileCacheExpiry = 2592000; // 1 month (30*24*3600)
62 
64  protected $mFileExists = [];
65 
67  private $mApiBase;
68 
72  public function __construct( $info ) {
73  global $wgLocalFileRepo;
74  parent::__construct( $info );
75 
76  // https://commons.wikimedia.org/w/api.php
77  $this->mApiBase = $info['apibase'] ?? null;
78 
79  if ( isset( $info['apiThumbCacheExpiry'] ) ) {
80  $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
81  }
82  if ( isset( $info['fileCacheExpiry'] ) ) {
83  $this->fileCacheExpiry = $info['fileCacheExpiry'];
84  }
85  if ( !$this->scriptDirUrl ) {
86  // hack for description fetches
87  $this->scriptDirUrl = dirname( $this->mApiBase );
88  }
89  // If we can cache thumbs we can guess sane defaults for these
90  if ( $this->canCacheThumbs() && !$this->url ) {
91  $this->url = $wgLocalFileRepo['url'];
92  }
93  if ( $this->canCacheThumbs() && !$this->thumbUrl ) {
94  $this->thumbUrl = $this->url . '/thumb';
95  }
96  }
97 
101  private function getApiUrl() {
102  return $this->mApiBase;
103  }
104 
113  public function newFile( $title, $time = false ) {
114  if ( $time ) {
115  return false;
116  }
117 
118  return parent::newFile( $title, $time );
119  }
120 
125  public function fileExistsBatch( array $files ) {
126  $results = [];
127  foreach ( $files as $k => $f ) {
128  if ( isset( $this->mFileExists[$f] ) ) {
129  $results[$k] = $this->mFileExists[$f];
130  unset( $files[$k] );
131  } elseif ( self::isVirtualUrl( $f ) ) {
132  # @todo FIXME: We need to be able to handle virtual
133  # URLs better, at least when we know they refer to the
134  # same repo.
135  $results[$k] = false;
136  unset( $files[$k] );
137  } elseif ( FileBackend::isStoragePath( $f ) ) {
138  $results[$k] = false;
139  unset( $files[$k] );
140  wfWarn( "Got mwstore:// path '$f'." );
141  }
142  }
143 
144  $data = $this->fetchImageQuery( [
145  'titles' => implode( '|', $files ),
146  'prop' => 'imageinfo' ]
147  );
148 
149  if ( isset( $data['query']['pages'] ) ) {
150  # First, get results from the query. Note we only care whether the image exists,
151  # not whether it has a description page.
152  foreach ( $data['query']['pages'] as $p ) {
153  $this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
154  }
155  # Second, copy the results to any redirects that were queried
156  if ( isset( $data['query']['redirects'] ) ) {
157  foreach ( $data['query']['redirects'] as $r ) {
158  $this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
159  }
160  }
161  # Third, copy the results to any non-normalized titles that were queried
162  if ( isset( $data['query']['normalized'] ) ) {
163  foreach ( $data['query']['normalized'] as $n ) {
164  $this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
165  }
166  }
167  # Finally, copy the results to the output
168  foreach ( $files as $key => $file ) {
169  $results[$key] = $this->mFileExists[$file];
170  }
171  }
172 
173  return $results;
174  }
175 
180  public function getFileProps( $virtualUrl ) {
181  return [];
182  }
183 
188  public function fetchImageQuery( $query ) {
189  global $wgLanguageCode;
190 
191  $query = array_merge( $query,
192  [
193  'format' => 'json',
194  'action' => 'query',
195  'redirects' => 'true'
196  ] );
197 
198  if ( !isset( $query['uselang'] ) ) { // uselang is unset or null
199  $query['uselang'] = $wgLanguageCode;
200  }
201 
202  $data = $this->httpGetCached( 'Metadata', $query );
203 
204  if ( $data ) {
205  return FormatJson::decode( $data, true );
206  } else {
207  return null;
208  }
209  }
210 
215  public function getImageInfo( $data ) {
216  if ( $data && isset( $data['query']['pages'] ) ) {
217  foreach ( $data['query']['pages'] as $info ) {
218  if ( isset( $info['imageinfo'][0] ) ) {
219  $return = $info['imageinfo'][0];
220  if ( isset( $info['pageid'] ) ) {
221  $return['pageid'] = $info['pageid'];
222  }
223  return $return;
224  }
225  }
226  }
227 
228  return false;
229  }
230 
235  public function findBySha1( $hash ) {
236  $results = $this->fetchImageQuery( [
237  'aisha1base36' => $hash,
238  'aiprop' => ForeignAPIFile::getProps(),
239  'list' => 'allimages',
240  ] );
241  $ret = [];
242  if ( isset( $results['query']['allimages'] ) ) {
243  foreach ( $results['query']['allimages'] as $img ) {
244  // 1.14 was broken, doesn't return name attribute
245  if ( !isset( $img['name'] ) ) {
246  continue;
247  }
248  $ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
249  }
250  }
251 
252  return $ret;
253  }
254 
264  private function getThumbUrl(
265  $name, $width = -1, $height = -1, &$result = null, $otherParams = ''
266  ) {
267  $data = $this->fetchImageQuery( [
268  'titles' => 'File:' . $name,
269  'iiprop' => self::getIIProps(),
270  'iiurlwidth' => $width,
271  'iiurlheight' => $height,
272  'iiurlparam' => $otherParams,
273  'prop' => 'imageinfo' ] );
274  $info = $this->getImageInfo( $data );
275 
276  if ( $data && $info && isset( $info['thumburl'] ) ) {
277  wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] );
278  $result = $info;
279 
280  return $info['thumburl'];
281  } else {
282  return false;
283  }
284  }
285 
295  public function getThumbError(
296  $name, $width = -1, $height = -1, $otherParams = '', $lang = null
297  ) {
298  $data = $this->fetchImageQuery( [
299  'titles' => 'File:' . $name,
300  'iiprop' => self::getIIProps(),
301  'iiurlwidth' => $width,
302  'iiurlheight' => $height,
303  'iiurlparam' => $otherParams,
304  'prop' => 'imageinfo',
305  'uselang' => $lang,
306  ] );
307  $info = $this->getImageInfo( $data );
308 
309  if ( $data && $info && isset( $info['thumberror'] ) ) {
310  wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] );
311 
312  return new MediaTransformError(
313  'thumbnail_error_remote',
314  $width,
315  $height,
316  $this->getDisplayName(),
317  $info['thumberror'] // already parsed message from foreign repo
318  );
319  } else {
320  return false;
321  }
322  }
323 
337  public function getThumbUrlFromCache( $name, $width, $height, $params = "" ) {
338  // We can't check the local cache using FileRepo functions because
339  // we override fileExistsBatch(). We have to use the FileBackend directly.
340  $backend = $this->getBackend(); // convenience
341 
342  if ( !$this->canCacheThumbs() ) {
343  $result = null; // can't pass "null" by reference, but it's ok as default value
344  return $this->getThumbUrl( $name, $width, $height, $result, $params );
345  }
346  $key = $this->getLocalCacheKey( 'ForeignAPIRepo', 'ThumbUrl', $name );
347  $sizekey = "$width:$height:$params";
348 
349  /* Get the array of urls that we already know */
350  $knownThumbUrls = $this->wanCache->get( $key );
351  if ( !$knownThumbUrls ) {
352  /* No knownThumbUrls for this file */
353  $knownThumbUrls = [];
354  } elseif ( isset( $knownThumbUrls[$sizekey] ) ) {
355  wfDebug( __METHOD__ . ': Got thumburl from local cache: ' .
356  "{$knownThumbUrls[$sizekey]}" );
357 
358  return $knownThumbUrls[$sizekey];
359  }
360 
361  $metadata = null;
362  $foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params );
363 
364  if ( !$foreignUrl ) {
365  wfDebug( __METHOD__ . " Could not find thumburl" );
366 
367  return false;
368  }
369 
370  // We need the same filename as the remote one :)
371  $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
372  if ( !$this->validateFilename( $fileName ) ) {
373  wfDebug( __METHOD__ . " The deduced filename $fileName is not safe" );
374 
375  return false;
376  }
377  $localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
378  $localFilename = $localPath . "/" . $fileName;
379  $localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) .
380  rawurlencode( $name ) . "/" . rawurlencode( $fileName );
381 
382  if ( $backend->fileExists( [ 'src' => $localFilename ] )
383  && isset( $metadata['timestamp'] )
384  ) {
385  wfDebug( __METHOD__ . " Thumbnail was already downloaded before" );
386  $modified = $backend->getFileTimestamp( [ 'src' => $localFilename ] );
387  $remoteModified = strtotime( $metadata['timestamp'] );
388  $current = time();
389  $diff = abs( $modified - $current );
390  if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
391  /* Use our current and already downloaded thumbnail */
392  $knownThumbUrls[$sizekey] = $localUrl;
393  $this->wanCache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
394 
395  return $localUrl;
396  }
397  /* There is a new Commons file, or existing thumbnail older than a month */
398  }
399 
400  $thumb = self::httpGet( $foreignUrl, 'default', [], $mtime );
401  if ( !$thumb ) {
402  wfDebug( __METHOD__ . " Could not download thumb" );
403 
404  return false;
405  }
406 
407  # @todo FIXME: Delete old thumbs that aren't being used. Maintenance script?
408  $backend->prepare( [ 'dir' => dirname( $localFilename ) ] );
409  $params = [ 'dst' => $localFilename, 'content' => $thumb ];
410  if ( !$backend->quickCreate( $params )->isOK() ) {
411  wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'" );
412 
413  return $foreignUrl;
414  }
415  $knownThumbUrls[$sizekey] = $localUrl;
416 
417  $ttl = $mtime
418  ? $this->wanCache->adaptiveTTL( $mtime, $this->apiThumbCacheExpiry )
420  $this->wanCache->set( $key, $knownThumbUrls, $ttl );
421  wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache" );
422 
423  return $localUrl;
424  }
425 
432  public function getZoneUrl( $zone, $ext = null ) {
433  switch ( $zone ) {
434  case 'public':
435  return $this->url;
436  case 'thumb':
437  return $this->thumbUrl;
438  default:
439  return parent::getZoneUrl( $zone, $ext );
440  }
441  }
442 
448  public function getZonePath( $zone ) {
449  $supported = [ 'public', 'thumb' ];
450  if ( in_array( $zone, $supported ) ) {
451  return parent::getZonePath( $zone );
452  }
453 
454  return false;
455  }
456 
461  public function canCacheThumbs() {
462  return ( $this->apiThumbCacheExpiry > 0 );
463  }
464 
469  public static function getUserAgent() {
470  return Http::userAgent() . " ForeignAPIRepo/" . self::VERSION;
471  }
472 
479  public function getInfo() {
480  $info = parent::getInfo();
481  $info['apiurl'] = $this->getApiUrl();
482 
483  $query = [
484  'format' => 'json',
485  'action' => 'query',
486  'meta' => 'siteinfo',
487  'siprop' => 'general',
488  ];
489 
490  $data = $this->httpGetCached( 'SiteInfo', $query, 7200 );
491 
492  if ( $data ) {
493  $siteInfo = FormatJson::decode( $data, true );
494  $general = $siteInfo['query']['general'];
495 
496  $info['articlepath'] = $general['articlepath'];
497  $info['server'] = $general['server'];
498 
499  if ( isset( $general['favicon'] ) ) {
500  $info['favicon'] = $general['favicon'];
501  }
502  }
503 
504  return $info;
505  }
506 
517  public static function httpGet(
518  $url, $timeout = 'default', $options = [], &$mtime = false
519  ) {
520  $options['timeout'] = $timeout;
521  /* Http::get */
523  wfDebug( "ForeignAPIRepo: HTTP GET: $url" );
524  $options['method'] = "GET";
525 
526  if ( !isset( $options['timeout'] ) ) {
527  $options['timeout'] = 'default';
528  }
529 
530  $req = MWHttpRequest::factory( $url, $options, __METHOD__ );
531  $req->setUserAgent( self::getUserAgent() );
532  $status = $req->execute();
533 
534  if ( $status->isOK() ) {
535  $lmod = $req->getResponseHeader( 'Last-Modified' );
536  $mtime = $lmod ? wfTimestamp( TS_UNIX, $lmod ) : false;
537 
538  return $req->getContent();
539  } else {
540  $logger = LoggerFactory::getInstance( 'http' );
541  $logger->warning(
542  $status->getWikiText( false, false, 'en' ),
543  [ 'caller' => 'ForeignAPIRepo::httpGet' ]
544  );
545 
546  return false;
547  }
548  }
549 
554  protected static function getIIProps() {
555  return implode( '|', self::$imageInfoProps );
556  }
557 
565  public function httpGetCached( $target, $query, $cacheTTL = 3600 ) {
566  if ( $this->mApiBase ) {
567  $url = wfAppendQuery( $this->mApiBase, $query );
568  } else {
569  $url = $this->makeUrl( $query, 'api' );
570  }
571 
572  return $this->wanCache->getWithSetCallback(
573  $this->getLocalCacheKey( static::class, $target, md5( $url ) ),
574  $cacheTTL,
575  function ( $curValue, &$ttl ) use ( $url ) {
576  $html = self::httpGet( $url, 'default', [], $mtime );
577  if ( $html !== false ) {
578  $ttl = $mtime ? $this->wanCache->adaptiveTTL( $mtime, $ttl ) : $ttl;
579  } else {
580  $ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl );
581  $html = null; // caches negatives
582  }
583 
584  return $html;
585  },
586  [ 'pcGroup' => 'http-get:3', 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
587  );
588  }
589 
594  public function enumFiles( $callback ) {
595  throw new MWException( 'enumFiles is not supported by ' . static::class );
596  }
597 
601  protected function assertWritableRepo() {
602  throw new MWException( static::class . ': write operations are not supported.' );
603  }
604 }
ForeignAPIRepo\getApiUrl
getApiUrl()
Definition: ForeignAPIRepo.php:101
ForeignAPIRepo\findBySha1
findBySha1( $hash)
Definition: ForeignAPIRepo.php:235
ForeignAPIRepo\httpGet
static httpGet( $url, $timeout='default', $options=[], &$mtime=false)
Like a HttpRequestFactory::get request, but with custom User-Agent.
Definition: ForeignAPIRepo.php:517
MediaTransformError
Basic media transform error class.
Definition: MediaTransformError.php:31
ForeignAPIRepo\$apiThumbCacheExpiry
int $apiThumbCacheExpiry
Check back with Commons after this expiry.
Definition: ForeignAPIRepo.php:58
ForeignAPIRepo\getUserAgent
static getUserAgent()
The user agent the ForeignAPIRepo will use.
Definition: ForeignAPIRepo.php:469
FileRepo\validateFilename
validateFilename( $filename)
Determine if a relative path is valid, i.e.
Definition: FileRepo.php:1719
$lang
if(!isset( $args[0])) $lang
Definition: testCompression.php:37
FileRepo\makeUrl
makeUrl( $query='', $entry='index')
Make an url to this repo.
Definition: FileRepo.php:769
ForeignAPIRepo\$fileCacheExpiry
int $fileCacheExpiry
Redownload thumbnail files after this expiry.
Definition: ForeignAPIRepo.php:61
ForeignAPIRepo\$fileFactory
$fileFactory
Definition: ForeignAPIRepo.php:56
$wgLocalFileRepo
$wgLocalFileRepo
File repository structures.
Definition: DefaultSettings.php:619
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1808
Http\userAgent
static userAgent()
A standard user-agent we can use for external requests.
Definition: Http.php:97
FileRepo\$thumbUrl
string $thumbUrl
The base thumbnail URL.
Definition: FileRepo.php:108
NS_FILE
const NS_FILE
Definition: Defines.php:75
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
ForeignAPIRepo\__construct
__construct( $info)
Definition: ForeignAPIRepo.php:72
ForeignAPIRepo\getThumbUrlFromCache
getThumbUrlFromCache( $name, $width, $height, $params="")
Return the imageurl from cache if possible.
Definition: ForeignAPIRepo.php:337
FileRepo\$backend
FileBackend $backend
Definition: FileRepo.php:64
ForeignAPIRepo\assertWritableRepo
assertWritableRepo()
Definition: ForeignAPIRepo.php:601
ForeignAPIRepo\getImageInfo
getImageInfo( $data)
Definition: ForeignAPIRepo.php:215
FileRepo
Base class for file repositories.
Definition: FileRepo.php:41
wfAppendQuery
wfAppendQuery( $url, $query)
Append a query string to an existing URL, which may or may not already have query string parameters a...
Definition: GlobalFunctions.php:438
FormatJson\decode
static decode( $value, $assoc=false)
Decodes a JSON string.
Definition: FormatJson.php:174
MWException
MediaWiki exception.
Definition: MWException.php:29
FileRepo\getLocalCacheKey
getLocalCacheKey(... $args)
Get a key for this repo in the local cache domain.
Definition: FileRepo.php:1875
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
FileBackend\getFileTimestamp
getFileTimestamp(array $params)
Get the last-modified timestamp of the file at a storage path.
FileBackend\isStoragePath
static isStoragePath( $path)
Check if a given path is a "mwstore://" path.
Definition: FileBackend.php:1528
ForeignAPIRepo\$mFileExists
array $mFileExists
Definition: ForeignAPIRepo.php:64
ForeignAPIRepo\getFileProps
getFileProps( $virtualUrl)
Definition: ForeignAPIRepo.php:180
$title
$title
Definition: testCompression.php:38
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:592
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:909
FileRepo\getDisplayName
getDisplayName()
Get the human-readable name of the repo.
Definition: FileRepo.php:1818
ForeignAPIRepo\getThumbUrl
getThumbUrl( $name, $width=-1, $height=-1, &$result=null, $otherParams='')
Definition: ForeignAPIRepo.php:264
$wgLanguageCode
$wgLanguageCode
Site language code.
Definition: DefaultSettings.php:3102
FileBackend\fileExists
fileExists(array $params)
Check if a file exists at a storage path in the backend.
PROTO_HTTP
const PROTO_HTTP
Definition: Defines.php:208
ForeignAPIRepo\fileExistsBatch
fileExistsBatch(array $files)
Definition: ForeignAPIRepo.php:125
FileBackend\quickCreate
quickCreate(array $params, array $opts=[])
Performs a single quick create operation.
Definition: FileBackend.php:765
FileBackend\prepare
prepare(array $params)
Prepare a storage directory for usage.
Definition: FileBackend.php:876
FileRepo\getBackend
getBackend()
Get the file backend instance.
Definition: FileRepo.php:244
ForeignAPIRepo\$imageInfoProps
static $imageInfoProps
List of iiprop values for the thumbnail fetch queries.
Definition: ForeignAPIRepo.php:51
ForeignAPIRepo\getIIProps
static getIIProps()
Definition: ForeignAPIRepo.php:554
ForeignAPIRepo\getZoneUrl
getZoneUrl( $zone, $ext=null)
Definition: ForeignAPIRepo.php:432
FileRepo\getHashPath
getHashPath( $name)
Get a relative path including trailing slash, e.g.
Definition: FileRepo.php:708
FileRepo\$name
string $name
Definition: FileRepo.php:152
ForeignAPIRepo\enumFiles
enumFiles( $callback)
Definition: ForeignAPIRepo.php:594
ForeignAPIRepo\VERSION
const VERSION
Definition: ForeignAPIRepo.php:45
ForeignAPIRepo
A foreign repository with a remote MediaWiki with an API thingy.
Definition: ForeignAPIRepo.php:41
ForeignAPIRepo\newFile
newFile( $title, $time=false)
Per docs in FileRepo, this needs to return false if we don't support versioned files.
Definition: ForeignAPIRepo.php:113
ForeignAPIRepo\httpGetCached
httpGetCached( $target, $query, $cacheTTL=3600)
HTTP GET request to a mediawiki API (with caching)
Definition: ForeignAPIRepo.php:565
ForeignAPIRepo\getZonePath
getZonePath( $zone)
Get the local directory corresponding to one of the basic zones.
Definition: ForeignAPIRepo.php:448
ForeignAPIFile\getProps
static getProps()
Get the property string for iiprop and aiprop.
Definition: ForeignAPIFile.php:96
$ext
if(!is_readable( $file)) $ext
Definition: router.php:48
wfWarn
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
Definition: GlobalFunctions.php:1073
ForeignAPIRepo\$mApiBase
string $mApiBase
Definition: ForeignAPIRepo.php:67
ForeignAPIFile
Foreign file accessible through api.php requests.
Definition: ForeignAPIFile.php:32
ForeignAPIRepo\getThumbError
getThumbError( $name, $width=-1, $height=-1, $otherParams='', $lang=null)
Definition: ForeignAPIRepo.php:295
ForeignAPIRepo\getInfo
getInfo()
Get information about the repo - overrides/extends the parent class's information.
Definition: ForeignAPIRepo.php:479
ForeignAPIRepo\canCacheThumbs
canCacheThumbs()
Are we locally caching the thumbnails?
Definition: ForeignAPIRepo.php:461
FileRepo\$url
string false $url
Public zone URL.
Definition: FileRepo.php:105
ForeignAPIRepo\fetchImageQuery
fetchImageQuery( $query)
Definition: ForeignAPIRepo.php:188
MWHttpRequest\factory
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
Definition: MWHttpRequest.php:195
wfExpandUrl
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
Definition: GlobalFunctions.php:490