MediaWiki master
ForeignAPIRepo.php
Go to the documentation of this file.
1<?php
29
47 /* This version string is used in the user agent for requests and will help
48 * server maintainers in identify ForeignAPI usage.
49 * Update the version every time you make breaking or significant changes. */
50 private const VERSION = "2.1";
51
55 private const IMAGE_INFO_PROPS = [
56 'url',
57 'timestamp',
58 ];
59
60 protected $fileFactory = [ ForeignAPIFile::class, 'newFromTitle' ];
62 protected $apiThumbCacheExpiry = 24 * 3600; // 1 day
63
65 protected $fileCacheExpiry = 30 * 24 * 3600; // 1 month
66
78 protected $apiMetadataExpiry = 4 * 3600; // 4 hours
79
81 protected $mFileExists = [];
82
84 private $mApiBase;
85
89 public function __construct( $info ) {
90 $localFileRepo = MediaWikiServices::getInstance()->getMainConfig()
91 ->get( MainConfigNames::LocalFileRepo );
92 parent::__construct( $info );
93
94 // https://commons.wikimedia.org/w/api.php
95 $this->mApiBase = $info['apibase'] ?? null;
96
97 if ( isset( $info['apiThumbCacheExpiry'] ) ) {
98 $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
99 }
100 if ( isset( $info['fileCacheExpiry'] ) ) {
101 $this->fileCacheExpiry = $info['fileCacheExpiry'];
102 }
103 if ( isset( $info['apiMetadataExpiry'] ) ) {
104 $this->apiMetadataExpiry = $info['apiMetadataExpiry'];
105 }
106 if ( !$this->scriptDirUrl ) {
107 // hack for description fetches
108 $this->scriptDirUrl = dirname( $this->mApiBase );
109 }
110 // If we can cache thumbs we can guess sensible defaults for these
111 if ( $this->canCacheThumbs() && !$this->url ) {
112 $this->url = $localFileRepo['url'];
113 }
114 if ( $this->canCacheThumbs() && !$this->thumbUrl ) {
115 $this->thumbUrl = $this->url . '/thumb';
116 }
117 }
118
127 public function newFile( $title, $time = false ) {
128 if ( $time ) {
129 return false;
130 }
131
132 return parent::newFile( $title, $time );
133 }
134
139 public function fileExistsBatch( array $files ) {
140 $results = [];
141 foreach ( $files as $k => $f ) {
142 if ( isset( $this->mFileExists[$f] ) ) {
143 $results[$k] = $this->mFileExists[$f];
144 unset( $files[$k] );
145 } elseif ( self::isVirtualUrl( $f ) ) {
146 # @todo FIXME: We need to be able to handle virtual
147 # URLs better, at least when we know they refer to the
148 # same repo.
149 $results[$k] = false;
150 unset( $files[$k] );
151 } elseif ( FileBackend::isStoragePath( $f ) ) {
152 $results[$k] = false;
153 unset( $files[$k] );
154 wfWarn( "Got mwstore:// path '$f'." );
155 }
156 }
157
158 $data = $this->fetchImageQuery( [
159 'titles' => implode( '|', $files ),
160 'prop' => 'imageinfo' ]
161 );
162
163 if ( isset( $data['query']['pages'] ) ) {
164 # First, get results from the query. Note we only care whether the image exists,
165 # not whether it has a description page.
166 foreach ( $data['query']['pages'] as $p ) {
167 $this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
168 }
169 # Second, copy the results to any redirects that were queried
170 if ( isset( $data['query']['redirects'] ) ) {
171 foreach ( $data['query']['redirects'] as $r ) {
172 $this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
173 }
174 }
175 # Third, copy the results to any non-normalized titles that were queried
176 if ( isset( $data['query']['normalized'] ) ) {
177 foreach ( $data['query']['normalized'] as $n ) {
178 $this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
179 }
180 }
181 # Finally, copy the results to the output
182 foreach ( $files as $key => $file ) {
183 $results[$key] = $this->mFileExists[$file];
184 }
185 }
186
187 return $results;
188 }
189
194 public function getFileProps( $virtualUrl ) {
195 return [];
196 }
197
204 public function fetchImageQuery( $query ) {
205 $languageCode = MediaWikiServices::getInstance()->getMainConfig()
206 ->get( MainConfigNames::LanguageCode );
207
208 $query = array_merge( $query,
209 [
210 'format' => 'json',
211 'action' => 'query',
212 'redirects' => 'true'
213 ] );
214
215 if ( !isset( $query['uselang'] ) ) { // uselang is unset or null
216 $query['uselang'] = $languageCode;
217 }
218
219 $data = $this->httpGetCached( 'Metadata', $query, $this->apiMetadataExpiry );
220
221 if ( $data ) {
222 return FormatJson::decode( $data, true );
223 } else {
224 return null;
225 }
226 }
227
232 public function getImageInfo( $data ) {
233 if ( $data && isset( $data['query']['pages'] ) ) {
234 foreach ( $data['query']['pages'] as $info ) {
235 if ( isset( $info['imageinfo'][0] ) ) {
236 $return = $info['imageinfo'][0];
237 if ( isset( $info['pageid'] ) ) {
238 $return['pageid'] = $info['pageid'];
239 }
240 return $return;
241 }
242 }
243 }
244
245 return false;
246 }
247
252 public function findBySha1( $hash ) {
253 $results = $this->fetchImageQuery( [
254 'aisha1base36' => $hash,
255 'aiprop' => ForeignAPIFile::getProps(),
256 'list' => 'allimages',
257 ] );
258 $ret = [];
259 if ( isset( $results['query']['allimages'] ) ) {
260 foreach ( $results['query']['allimages'] as $img ) {
261 // 1.14 was broken, doesn't return name attribute
262 if ( !isset( $img['name'] ) ) {
263 continue;
264 }
265 $ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
266 }
267 }
268
269 return $ret;
270 }
271
281 private function getThumbUrl(
282 $name, $width = -1, $height = -1, &$result = null, $otherParams = ''
283 ) {
284 $data = $this->fetchImageQuery( [
285 'titles' => 'File:' . $name,
286 'iiprop' => self::getIIProps(),
287 'iiurlwidth' => $width,
288 'iiurlheight' => $height,
289 'iiurlparam' => $otherParams,
290 'prop' => 'imageinfo' ] );
291 $info = $this->getImageInfo( $data );
292
293 if ( $data && $info && isset( $info['thumburl'] ) ) {
294 wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] );
295 $result = $info;
296
297 return $info['thumburl'];
298 } else {
299 return false;
300 }
301 }
302
312 public function getThumbError(
313 $name, $width = -1, $height = -1, $otherParams = '', $lang = null
314 ) {
315 $data = $this->fetchImageQuery( [
316 'titles' => 'File:' . $name,
317 'iiprop' => self::getIIProps(),
318 'iiurlwidth' => $width,
319 'iiurlheight' => $height,
320 'iiurlparam' => $otherParams,
321 'prop' => 'imageinfo',
322 'uselang' => $lang,
323 ] );
324 $info = $this->getImageInfo( $data );
325
326 if ( $data && $info && isset( $info['thumberror'] ) ) {
327 wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] );
328
329 return new MediaTransformError(
330 'thumbnail_error_remote',
331 $width,
332 $height,
333 $this->getDisplayName(),
334 $info['thumberror'] // already parsed message from foreign repo
335 );
336 } else {
337 return false;
338 }
339 }
340
354 public function getThumbUrlFromCache( $name, $width, $height, $params = "" ) {
355 // We can't check the local cache using FileRepo functions because
356 // we override fileExistsBatch(). We have to use the FileBackend directly.
357 $backend = $this->getBackend(); // convenience
358
359 if ( !$this->canCacheThumbs() ) {
360 $result = null; // can't pass "null" by reference, but it's ok as default value
361
362 return $this->getThumbUrl( $name, $width, $height, $result, $params );
363 }
364
365 $key = $this->getLocalCacheKey( 'file-thumb-url', sha1( $name ) );
366 $sizekey = "$width:$height:$params";
367
368 /* Get the array of urls that we already know */
369 $knownThumbUrls = $this->wanCache->get( $key );
370 if ( !$knownThumbUrls ) {
371 /* No knownThumbUrls for this file */
372 $knownThumbUrls = [];
373 } elseif ( isset( $knownThumbUrls[$sizekey] ) ) {
374 wfDebug( __METHOD__ . ': Got thumburl from local cache: ' .
375 "{$knownThumbUrls[$sizekey]}" );
376
377 return $knownThumbUrls[$sizekey];
378 }
379
380 $metadata = null;
381 $foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params );
382
383 if ( !$foreignUrl ) {
384 wfDebug( __METHOD__ . " Could not find thumburl" );
385
386 return false;
387 }
388
389 // We need the same filename as the remote one :)
390 $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
391 if ( !$this->validateFilename( $fileName ) ) {
392 wfDebug( __METHOD__ . " The deduced filename $fileName is not safe" );
393
394 return false;
395 }
396 $localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
397 $localFilename = $localPath . "/" . $fileName;
398 $localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) .
399 rawurlencode( $name ) . "/" . rawurlencode( $fileName );
400
401 if ( $backend->fileExists( [ 'src' => $localFilename ] )
402 && isset( $metadata['timestamp'] )
403 ) {
404 wfDebug( __METHOD__ . " Thumbnail was already downloaded before" );
405 $modified = (int)wfTimestamp( TS_UNIX, $backend->getFileTimestamp( [ 'src' => $localFilename ] ) );
406 $remoteModified = (int)wfTimestamp( TS_UNIX, $metadata['timestamp'] );
407 $current = (int)wfTimestamp( TS_UNIX );
408 $diff = abs( $modified - $current );
409 if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
410 /* Use our current and already downloaded thumbnail */
411 $knownThumbUrls[$sizekey] = $localUrl;
412 $this->wanCache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
413
414 return $localUrl;
415 }
416 /* There is a new Commons file, or existing thumbnail older than a month */
417 }
418
419 $thumb = self::httpGet( $foreignUrl, 'default', [], $mtime );
420 if ( !$thumb ) {
421 wfDebug( __METHOD__ . " Could not download thumb" );
422
423 return false;
424 }
425
426 # @todo FIXME: Delete old thumbs that aren't being used. Maintenance script?
427 $backend->prepare( [ 'dir' => dirname( $localFilename ) ] );
428 $params = [ 'dst' => $localFilename, 'content' => $thumb ];
429 if ( !$backend->quickCreate( $params )->isOK() ) {
430 wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'" );
431
432 return $foreignUrl;
433 }
434 $knownThumbUrls[$sizekey] = $localUrl;
435
436 $ttl = $mtime
437 ? $this->wanCache->adaptiveTTL( $mtime, $this->apiThumbCacheExpiry )
439 $this->wanCache->set( $key, $knownThumbUrls, $ttl );
440 wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache" );
441
442 return $localUrl;
443 }
444
451 public function getZoneUrl( $zone, $ext = null ) {
452 switch ( $zone ) {
453 case 'public':
454 return $this->url;
455 case 'thumb':
456 return $this->thumbUrl;
457 default:
458 return parent::getZoneUrl( $zone, $ext );
459 }
460 }
461
467 public function getZonePath( $zone ) {
468 $supported = [ 'public', 'thumb' ];
469 if ( in_array( $zone, $supported ) ) {
470 return parent::getZonePath( $zone );
471 }
472
473 return false;
474 }
475
480 public function canCacheThumbs() {
481 return ( $this->apiThumbCacheExpiry > 0 );
482 }
483
488 public static function getUserAgent() {
489 return MediaWikiServices::getInstance()->getHttpRequestFactory()->getUserAgent() .
490 " ForeignAPIRepo/" . self::VERSION;
491 }
492
499 public function getInfo() {
500 $info = parent::getInfo();
501 $info['apiurl'] = $this->mApiBase;
502
503 $query = [
504 'format' => 'json',
505 'action' => 'query',
506 'meta' => 'siteinfo',
507 'siprop' => 'general',
508 ];
509
510 $data = $this->httpGetCached( 'SiteInfo', $query, 7200 );
511
512 if ( $data ) {
513 $siteInfo = FormatJson::decode( $data, true );
514 $general = $siteInfo['query']['general'];
515
516 $info['articlepath'] = $general['articlepath'];
517 $info['server'] = $general['server'];
518 if ( !isset( $info['favicon'] ) && isset( $general['favicon'] ) ) {
519 $info['favicon'] = $general['favicon'];
520 }
521 }
522
523 return $info;
524 }
525
533 public static function httpGet(
534 $url, $timeout = 'default', $options = [], &$mtime = false
535 ) {
536 $options['timeout'] = $timeout;
537 $url = MediaWikiServices::getInstance()->getUrlUtils()
538 ->expand( $url, PROTO_HTTP );
539 wfDebug( "ForeignAPIRepo: HTTP GET: $url" );
540 if ( !$url ) {
541 return false;
542 }
543 $options['method'] = "GET";
544
545 if ( !isset( $options['timeout'] ) ) {
546 $options['timeout'] = 'default';
547 }
548
549 $options['userAgent'] = self::getUserAgent();
550
551 $req = MediaWikiServices::getInstance()->getHttpRequestFactory()
552 ->create( $url, $options, __METHOD__ );
553 $status = $req->execute();
554
555 if ( $status->isOK() ) {
556 $lmod = $req->getResponseHeader( 'Last-Modified' );
557 $mtime = $lmod ? (int)wfTimestamp( TS_UNIX, $lmod ) : false;
558
559 return $req->getContent();
560 } else {
561 $logger = LoggerFactory::getInstance( 'http' );
562 $logger->warning(
563 $status->getWikiText( false, false, 'en' ),
564 [ 'caller' => 'ForeignAPIRepo::httpGet' ]
565 );
566
567 return false;
568 }
569 }
570
575 protected static function getIIProps() {
576 return implode( '|', self::IMAGE_INFO_PROPS );
577 }
578
586 public function httpGetCached( $attribute, $query, $cacheTTL = 3600 ) {
587 if ( $this->mApiBase ) {
588 $url = wfAppendQuery( $this->mApiBase, $query );
589 } else {
590 $url = $this->makeUrl( $query, 'api' );
591 }
592
593 return $this->wanCache->getWithSetCallback(
594 // Allow reusing the same cached data across wikis (T285271).
595 // This does not use getSharedCacheKey() because caching here
596 // is transparent to client wikis (which are not expected to issue purges).
597 $this->wanCache->makeGlobalKey( "filerepo-$attribute", sha1( $url ) ),
598 $cacheTTL,
599 function ( $curValue, &$ttl ) use ( $url ) {
600 $html = self::httpGet( $url, 'default', [], $mtime );
601 // FIXME: This should use the mtime from the api response body
602 // not the mtime from the last-modified header which usually is not set.
603 if ( $html !== false ) {
604 $ttl = $mtime ? $this->wanCache->adaptiveTTL( $mtime, $ttl ) : $ttl;
605 } else {
606 $ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl );
607 $html = null; // caches negatives
608 }
609
610 return $html;
611 },
612 [ 'pcGroup' => 'http-get:3', 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
613 );
614 }
615
620 public function enumFiles( $callback ) {
621 throw new RuntimeException( 'enumFiles is not supported by ' . static::class );
622 }
623
627 protected function assertWritableRepo() {
628 throw new LogicException( static::class . ': write operations are not supported.' );
629 }
630}
const NS_FILE
Definition Defines.php:71
const PROTO_HTTP
Definition Defines.php:204
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfAppendQuery( $url, $query)
Append a query string to an existing URL, which may or may not already have query string parameters a...
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
array $params
The job parameters.
Base class for file repositories.
Definition FileRepo.php:52
getDisplayName()
Get the human-readable name of the repo.
getLocalCacheKey( $kClassSuffix,... $components)
Get a site-local, repository-qualified, WAN cache key.
makeUrl( $query='', $entry='index')
Make an url to this repo.
Definition FileRepo.php:809
FileBackend $backend
Definition FileRepo.php:75
string false $url
Public zone URL.
Definition FileRepo.php:116
validateFilename( $filename)
Determine if a relative path is valid, i.e.
string $name
Definition FileRepo.php:166
string false $thumbUrl
The base thumbnail URL.
Definition FileRepo.php:119
getHashPath( $name)
Get a relative path including trailing slash, e.g.
Definition FileRepo.php:748
getBackend()
Get the file backend instance.
Definition FileRepo.php:254
Foreign file accessible through api.php requests.
static getProps()
Get the property string for iiprop and aiprop.
A foreign repository for a remote MediaWiki accessible through api.php requests.
int $fileCacheExpiry
Redownload thumbnail files after this expiry.
newFile( $title, $time=false)
Per docs in FileRepo, this needs to return false if we don't support versioned files.
int $apiMetadataExpiry
API metadata cache time.
static httpGet( $url, $timeout='default', $options=[], &$mtime=false)
enumFiles( $callback)
getInfo()
Get information about the repo - overrides/extends the parent class's information.
static getUserAgent()
The user agent the ForeignAPIRepo will use.
fetchImageQuery( $query)
Make an API query in the foreign repo, caching results.
getThumbError( $name, $width=-1, $height=-1, $otherParams='', $lang=null)
canCacheThumbs()
Are we locally caching the thumbnails?
int $apiThumbCacheExpiry
Check back with Commons after this expiry.
getFileProps( $virtualUrl)
getThumbUrlFromCache( $name, $width, $height, $params="")
Return the imageurl from cache if possible.
fileExistsBatch(array $files)
getZoneUrl( $zone, $ext=null)
getZonePath( $zone)
Get the local directory corresponding to one of the basic zones.
httpGetCached( $attribute, $query, $cacheTTL=3600)
HTTP GET request to a mediawiki API (with caching)
Basic media transform error class.
JSON formatter wrapper class.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Represents a title within MediaWiki.
Definition Title.php:79
Base class for all file backend classes (including multi-write backends).
prepare(array $params)
Prepare a storage directory for usage.
getFileTimestamp(array $params)
Get the last-modified timestamp of the file at a storage path.
fileExists(array $params)
Check if a file exists at a storage path in the backend.
quickCreate(array $params, array $opts=[])
Performs a single quick create operation.
A foreign repo that implement support for API queries.
Represents the target of a wiki link.
Interface for objects (potentially) representing an editable wiki page.