MediaWiki master
ForeignAPIRepo.php
Go to the documentation of this file.
1<?php
7namespace MediaWiki\FileRepo;
8
9use LogicException;
20use RuntimeException;
23use Wikimedia\Timestamp\ConvertibleTimestamp;
24use Wikimedia\Timestamp\TimestampFormat as TS;
25
43 /* This version string is used in the user agent for requests and will help
44 * server maintainers in identify ForeignAPI usage.
45 * Update the version every time you make breaking or significant changes. */
46 private const VERSION = "2.1";
47
51 private const IMAGE_INFO_PROPS = [
52 'url',
53 'timestamp',
54 ];
55
57 protected $fileFactory = [ ForeignAPIFile::class, 'newFromTitle' ];
59 protected $apiThumbCacheExpiry = 24 * 3600; // 1 day
60
62 protected $fileCacheExpiry = 30 * 24 * 3600; // 1 month
63
75 protected $apiMetadataExpiry = 4 * 3600; // 4 hours
76
78 protected $mFileExists = [];
79
81 private $mApiBase;
82
84 private $userAgent;
85
89 public function __construct( $info ) {
90 $localFileRepo = MediaWikiServices::getInstance()->getMainConfig()
92 parent::__construct( $info );
93
94 // https://commons.wikimedia.org/w/api.php
95 $this->mApiBase = $info['apibase'] ?? null;
96
97 if ( isset( $info['apiThumbCacheExpiry'] ) ) {
98 $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
99 }
100 if ( isset( $info['fileCacheExpiry'] ) ) {
101 $this->fileCacheExpiry = $info['fileCacheExpiry'];
102 }
103 if ( isset( $info['apiMetadataExpiry'] ) ) {
104 $this->apiMetadataExpiry = $info['apiMetadataExpiry'];
105 }
106 if ( isset( $info['userAgent'] ) ) {
107 $this->userAgent = $info['userAgent'];
108 }
109 if ( !$this->scriptDirUrl ) {
110 // hack for description fetches
111 $this->scriptDirUrl = dirname( $this->mApiBase );
112 }
113 // If we can cache thumbs we can guess sensible defaults for these
114 if ( $this->canCacheThumbs() && !$this->url ) {
115 $this->url = $localFileRepo['url'];
116 }
117 if ( $this->canCacheThumbs() && !$this->thumbUrl ) {
118 $this->thumbUrl = $this->url . '/thumb';
119 }
120 }
121
130 public function newFile( $title, $time = false ) {
131 if ( $time ) {
132 return false;
133 }
134
135 return parent::newFile( $title, $time );
136 }
137
142 public function fileExistsBatch( array $files ) {
143 $results = [];
144 foreach ( $files as $k => $f ) {
145 if ( isset( $this->mFileExists[$f] ) ) {
146 $results[$k] = $this->mFileExists[$f];
147 unset( $files[$k] );
148 } elseif ( self::isVirtualUrl( $f ) ) {
149 # @todo FIXME: We need to be able to handle virtual
150 # URLs better, at least when we know they refer to the
151 # same repo.
152 $results[$k] = false;
153 unset( $files[$k] );
154 } elseif ( FileBackend::isStoragePath( $f ) ) {
155 $results[$k] = false;
156 unset( $files[$k] );
157 wfWarn( "Got mwstore:// path '$f'." );
158 }
159 }
160
161 $data = $this->fetchImageQuery( [
162 'titles' => implode( '|', $files ),
163 'prop' => 'imageinfo' ]
164 );
165
166 if ( isset( $data['query']['pages'] ) ) {
167 # First, get results from the query. Note we only care whether the image exists,
168 # not whether it has a description page.
169 foreach ( $data['query']['pages'] as $p ) {
170 $this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
171 }
172 # Second, copy the results to any redirects that were queried
173 if ( isset( $data['query']['redirects'] ) ) {
174 foreach ( $data['query']['redirects'] as $r ) {
175 $this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
176 }
177 }
178 # Third, copy the results to any non-normalized titles that were queried
179 if ( isset( $data['query']['normalized'] ) ) {
180 foreach ( $data['query']['normalized'] as $n ) {
181 $this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
182 }
183 }
184 # Finally, copy the results to the output
185 foreach ( $files as $key => $file ) {
186 $results[$key] = $this->mFileExists[$file];
187 }
188 }
189
190 return $results;
191 }
192
197 public function getFileProps( $virtualUrl ) {
198 return [];
199 }
200
207 public function fetchImageQuery( $query ) {
208 $languageCode = MediaWikiServices::getInstance()->getMainConfig()
210
211 $query = array_merge( $query,
212 [
213 'format' => 'json',
214 'action' => 'query',
215 'redirects' => 'true'
216 ] );
217
218 if ( !isset( $query['uselang'] ) ) { // uselang is unset or null
219 $query['uselang'] = $languageCode;
220 }
221
222 $data = $this->httpGetCached( 'Metadata', $query, $this->apiMetadataExpiry );
223
224 if ( $data ) {
225 return FormatJson::decode( $data, true );
226 } else {
227 return null;
228 }
229 }
230
235 public function getImageInfo( $data ) {
236 if ( $data && isset( $data['query']['pages'] ) ) {
237 foreach ( $data['query']['pages'] as $info ) {
238 if ( isset( $info['imageinfo'][0] ) ) {
239 $return = $info['imageinfo'][0];
240 if ( isset( $info['pageid'] ) ) {
241 $return['pageid'] = $info['pageid'];
242 }
243 return $return;
244 }
245 }
246 }
247
248 return false;
249 }
250
255 public function findBySha1( $hash ) {
256 $results = $this->fetchImageQuery( [
257 'aisha1base36' => $hash,
258 'aiprop' => ForeignAPIFile::getProps(),
259 'list' => 'allimages',
260 ] );
261 $ret = [];
262 if ( isset( $results['query']['allimages'] ) ) {
263 foreach ( $results['query']['allimages'] as $img ) {
264 // 1.14 was broken, doesn't return name attribute
265 if ( !isset( $img['name'] ) ) {
266 continue;
267 }
268 $ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
269 }
270 }
271
272 return $ret;
273 }
274
284 private function getThumbUrl(
285 $name, $width = -1, $height = -1, &$result = null, $otherParams = ''
286 ) {
287 $data = $this->fetchImageQuery( [
288 'titles' => 'File:' . $name,
289 'iiprop' => self::getIIProps(),
290 'iiurlwidth' => $width,
291 'iiurlheight' => $height,
292 'iiurlparam' => $otherParams,
293 'prop' => 'imageinfo' ] );
294 $info = $this->getImageInfo( $data );
295
296 if ( $data && $info && isset( $info['thumburl'] ) ) {
297 wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] );
298 $result = $info;
299
300 return $info['thumburl'];
301 } else {
302 return false;
303 }
304 }
305
315 public function getThumbError(
316 $name, $width = -1, $height = -1, $otherParams = '', $lang = null
317 ) {
318 $data = $this->fetchImageQuery( [
319 'titles' => 'File:' . $name,
320 'iiprop' => self::getIIProps(),
321 'iiurlwidth' => $width,
322 'iiurlheight' => $height,
323 'iiurlparam' => $otherParams,
324 'prop' => 'imageinfo',
325 'uselang' => $lang,
326 ] );
327 $info = $this->getImageInfo( $data );
328
329 if ( $data && $info && isset( $info['thumberror'] ) ) {
330 wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] );
331
332 return new MediaTransformError(
333 'thumbnail_error_remote',
334 $width,
335 $height,
336 $this->getDisplayName(),
337 $info['thumberror'] // already parsed message from foreign repo
338 );
339 } else {
340 return false;
341 }
342 }
343
357 public function getThumbUrlFromCache( $name, $width, $height, $params = "" ) {
358 // We can't check the local cache using FileRepo functions because
359 // we override fileExistsBatch(). We have to use the FileBackend directly.
360 $backend = $this->getBackend(); // convenience
361
362 if ( !$this->canCacheThumbs() ) {
363 $result = null; // can't pass "null" by reference, but it's ok as default value
364
365 return $this->getThumbUrl( $name, $width, $height, $result, $params );
366 }
367
368 $key = $this->getLocalCacheKey( 'file-thumb-url', sha1( $name ) );
369 $sizekey = "$width:$height:$params";
370
371 /* Get the array of urls that we already know */
372 $knownThumbUrls = $this->wanCache->get( $key );
373 if ( !$knownThumbUrls ) {
374 /* No knownThumbUrls for this file */
375 $knownThumbUrls = [];
376 } elseif ( isset( $knownThumbUrls[$sizekey] ) ) {
377 wfDebug( __METHOD__ . ': Got thumburl from local cache: ' .
378 "{$knownThumbUrls[$sizekey]}" );
379
380 return $knownThumbUrls[$sizekey];
381 }
382
383 $metadata = null;
384 $foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params );
385
386 if ( !$foreignUrl ) {
387 wfDebug( __METHOD__ . " Could not find thumburl" );
388
389 return false;
390 }
391
392 // We need the same filename as the remote one :)
393 $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
394 if ( !$this->validateFilename( $fileName ) ) {
395 wfDebug( __METHOD__ . " The deduced filename $fileName is not safe" );
396
397 return false;
398 }
399 $localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
400 $localFilename = $localPath . "/" . $fileName;
401 $localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) .
402 rawurlencode( $name ) . "/" . rawurlencode( $fileName );
403
404 if ( $backend->fileExists( [ 'src' => $localFilename ] )
405 && isset( $metadata['timestamp'] )
406 ) {
407 wfDebug( __METHOD__ . " Thumbnail was already downloaded before" );
408 $modified = (int)wfTimestamp( TS::UNIX, $backend->getFileTimestamp( [ 'src' => $localFilename ] ) );
409 $remoteModified = (int)wfTimestamp( TS::UNIX, $metadata['timestamp'] );
410 $current = (int)ConvertibleTimestamp::now( TS::UNIX );
411 $diff = abs( $modified - $current );
412 if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
413 /* Use our current and already downloaded thumbnail */
414 $knownThumbUrls[$sizekey] = $localUrl;
415 $this->wanCache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
416
417 return $localUrl;
418 }
419 /* There is a new Commons file, or existing thumbnail older than a month */
420 }
421
422 $thumb = $this->httpGet( $foreignUrl, 'default', [], $mtime );
423 if ( !$thumb ) {
424 wfDebug( __METHOD__ . " Could not download thumb" );
425
426 return false;
427 }
428
429 # @todo FIXME: Delete old thumbs that aren't being used. Maintenance script?
430 $backend->prepare( [ 'dir' => dirname( $localFilename ) ] );
431 $params = [ 'dst' => $localFilename, 'content' => $thumb ];
432 if ( !$backend->quickCreate( $params )->isOK() ) {
433 wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'" );
434
435 return $foreignUrl;
436 }
437 $knownThumbUrls[$sizekey] = $localUrl;
438
439 $ttl = $mtime
440 ? $this->wanCache->adaptiveTTL( $mtime, $this->apiThumbCacheExpiry )
442 $this->wanCache->set( $key, $knownThumbUrls, $ttl );
443 wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache" );
444
445 return $localUrl;
446 }
447
454 public function getZoneUrl( $zone, $ext = null ) {
455 switch ( $zone ) {
456 case 'public':
457 return $this->url;
458 case 'thumb':
459 return $this->thumbUrl;
460 default:
461 return parent::getZoneUrl( $zone, $ext );
462 }
463 }
464
470 public function getZonePath( $zone ) {
471 $supported = [ 'public', 'thumb' ];
472 if ( in_array( $zone, $supported ) ) {
473 return parent::getZonePath( $zone );
474 }
475
476 return false;
477 }
478
483 public function canCacheThumbs() {
484 return ( $this->apiThumbCacheExpiry > 0 );
485 }
486
491 public function getUserAgent() {
492 $mediaWikiVersion = MediaWikiServices::getInstance()->getHttpRequestFactory()->getUserAgent();
493 $classVersion = self::VERSION;
494 $contactUrl = MediaWikiServices::getInstance()->getUrlUtils()->getCanonicalServer();
495 $extra = $this->userAgent !== null ? ' ' . $this->userAgent : '';
496 return "$mediaWikiVersion ($contactUrl) ForeignAPIRepo/$classVersion" . $extra;
497 }
498
505 public function getInfo() {
506 $info = parent::getInfo();
507 $info['apiurl'] = $this->mApiBase;
508
509 $query = [
510 'format' => 'json',
511 'action' => 'query',
512 'meta' => 'siteinfo',
513 'siprop' => 'general',
514 ];
515
516 $data = $this->httpGetCached( 'SiteInfo', $query, 7200 );
517
518 if ( $data ) {
519 $siteInfo = FormatJson::decode( $data, true );
520 $general = $siteInfo['query']['general'];
521
522 $info['articlepath'] = $general['articlepath'];
523 $info['server'] = $general['server'];
524 if ( !isset( $info['favicon'] ) && isset( $general['favicon'] ) ) {
525 $info['favicon'] = $general['favicon'];
526 }
527 }
528
529 return $info;
530 }
531
539 public function httpGet(
540 $url, $timeout = 'default', $options = [], &$mtime = false
541 ) {
542 $urlUtils = MediaWikiServices::getInstance()->getUrlUtils();
543 $requestFactory = MediaWikiServices::getInstance()->getHttpRequestFactory();
544
545 $options['timeout'] = $timeout;
546 $url = $urlUtils->expand( $url, PROTO_HTTP );
547 wfDebug( "ForeignAPIRepo: HTTP GET: $url" );
548 if ( !$url ) {
549 return false;
550 }
551 $options['method'] = "GET";
552
553 if ( !isset( $options['timeout'] ) ) {
554 $options['timeout'] = 'default';
555 }
556
557 $options['userAgent'] = $this->getUserAgent();
558
559 $req = $requestFactory->create( $url, $options, __METHOD__ );
560 $req->setHeader( 'Referer', $urlUtils->getCanonicalServer() );
561 $status = $req->execute();
562
563 if ( $status->isOK() ) {
564 $lmod = $req->getResponseHeader( 'Last-Modified' );
565 $mtime = $lmod ? (int)wfTimestamp( TS::UNIX, $lmod ) : false;
566
567 return $req->getContent();
568 } else {
569 $logger = LoggerFactory::getInstance( 'http' );
570 $logger->warning(
571 $status->getWikiText( false, false, 'en' ),
572 [ 'caller' => __METHOD__ ]
573 );
574
575 return false;
576 }
577 }
578
583 protected static function getIIProps() {
584 return implode( '|', self::IMAGE_INFO_PROPS );
585 }
586
594 public function httpGetCached( $attribute, $query, $cacheTTL = 3600 ) {
595 if ( $this->mApiBase ) {
596 $url = wfAppendQuery( $this->mApiBase, $query );
597 } else {
598 $url = $this->makeUrl( $query, 'api' );
599 }
600
601 return $this->wanCache->getWithSetCallback(
602 // Allow reusing the same cached data across wikis (T285271).
603 // This does not use getSharedCacheKey() because caching here
604 // is transparent to client wikis (which are not expected to issue purges).
605 $this->wanCache->makeGlobalKey( "filerepo-$attribute", sha1( $url ) ),
606 $cacheTTL,
607 function ( $curValue, &$ttl ) use ( $url ) {
608 $html = $this->httpGet( $url, 'default', [], $mtime );
609 // FIXME: This should use the mtime from the api response body
610 // not the mtime from the last-modified header which usually is not set.
611 if ( $html !== false ) {
612 $ttl = $mtime ? $this->wanCache->adaptiveTTL( $mtime, $ttl ) : $ttl;
613 } else {
614 $ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl );
615 $html = null; // caches negatives
616 }
617
618 return $html;
619 },
620 [ 'pcGroup' => 'http-get:3', 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
621 );
622 }
623
628 public function enumFiles( $callback ): never {
629 throw new RuntimeException( 'enumFiles is not supported by ' . static::class );
630 }
631
632 protected function assertWritableRepo(): never {
633 throw new LogicException( static::class . ': write operations are not supported.' );
634 }
635}
636
638class_alias( ForeignAPIRepo::class, 'ForeignAPIRepo' );
const NS_FILE
Definition Defines.php:57
const PROTO_HTTP
Definition Defines.php:217
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfAppendQuery( $url, $query)
Append a query string to an existing URL, which may or may not already have query string parameters a...
makeTitle( $linkId)
Convert a link ID to a Title.to override Title
Base class for file repositories.
Definition FileRepo.php:51
string false $url
Public zone URL.
Definition FileRepo.php:115
getLocalCacheKey( $kClassSuffix,... $components)
Get a site-local, repository-qualified, WAN cache key.
getBackend()
Get the file backend instance.
Definition FileRepo.php:253
string false $thumbUrl
The base thumbnail URL.
Definition FileRepo.php:118
getHashPath( $name)
Get a relative path including trailing slash, e.g.
Definition FileRepo.php:747
getDisplayName()
Get the human-readable name of the repo.
makeUrl( $query='', $entry='index')
Make an url to this repo.
Definition FileRepo.php:808
validateFilename( $filename)
Determine if a relative path is valid, i.e.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
Foreign file accessible through api.php requests.
static getProps()
Get the property string for iiprop and aiprop.
A foreign repository for a remote MediaWiki accessible through api.php requests.
getThumbUrlFromCache( $name, $width, $height, $params="")
Return the imageurl from cache if possible.
int $apiMetadataExpiry
API metadata cache time.
int $fileCacheExpiry
Redownload thumbnail files after this expiry.
httpGet( $url, $timeout='default', $options=[], &$mtime=false)
assertWritableRepo()
Throw an exception if this repo is read-only by design.
newFile( $title, $time=false)
Per docs in FileRepo, this needs to return false if we don't support versioned files.
fetchImageQuery( $query)
Make an API query in the foreign repo, caching results.
httpGetCached( $attribute, $query, $cacheTTL=3600)
HTTP GET request to a mediawiki API (with caching)
getZonePath( $zone)
Get the local directory corresponding to one of the basic zones.
canCacheThumbs()
Are we locally caching the thumbnails?
getUserAgent()
The user agent the ForeignAPIRepo will use.
getInfo()
Get information about the repo - overrides/extends the parent class's information.
getThumbError( $name, $width=-1, $height=-1, $otherParams='', $lang=null)
int $apiThumbCacheExpiry
Check back with Commons after this expiry.
JSON formatter wrapper class.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const LocalFileRepo
Name constant for the LocalFileRepo setting, for use with Config::get()
const LanguageCode
Name constant for the LanguageCode setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Basic media transform error class.
Represents a title within MediaWiki.
Definition Title.php:69
Base class for all file backend classes (including multi-write backends).
prepare(array $params)
Prepare a storage directory for usage.
getFileTimestamp(array $params)
Get the last-modified timestamp of the file at a storage path.
fileExists(array $params)
Check if a file exists at a storage path in the backend.
quickCreate(array $params, array $opts=[])
Performs a single quick create operation.
static isStoragePath( $path)
Check if a given path is a "mwstore://" path.
Multi-datacenter aware caching interface.
A foreign repo that implement support for API queries.
Represents the target of a wiki link.
Interface for objects (potentially) representing an editable wiki page.