Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
87.77% covered (warning)
87.77%
201 / 229
42.86% covered (danger)
42.86%
6 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiDetailRetriever
87.77% covered (warning)
87.77%
201 / 229
42.86% covered (danger)
42.86%
6 / 14
65.36
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
2
 sendApiRequest
57.14% covered (warning)
57.14%
4 / 7
0.00% covered (danger)
0.00%
0 / 1
2.31
 getImportDetails
98.21% covered (success)
98.21%
55 / 56
0.00% covered (danger)
0.00%
0 / 1
8
 reduceTitleList
81.82% covered (warning)
81.82%
9 / 11
0.00% covered (danger)
0.00%
0 / 1
1.01
 getMoreRevisions
79.31% covered (warning)
79.31%
23 / 29
0.00% covered (danger)
0.00%
0 / 1
9.72
 checkRevisionCount
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
5
 checkMaxRevisionAggregatedBytes
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getFileRevisionsFromImageInfo
65.00% covered (warning)
65.00%
13 / 20
0.00% covered (danger)
0.00%
0 / 1
10.74
 getTextRevisionsFromRevisionsInfo
61.11% covered (warning)
61.11%
11 / 18
0.00% covered (danger)
0.00%
0 / 1
9.88
 getBaseParams
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
1
 addTextRevisionsToParams
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
3
 addFileRevisionsToParams
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
3
 addTemplatesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 addCategoriesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use ConfigException;
6use FileImporter\Data\FileRevision;
7use FileImporter\Data\FileRevisions;
8use FileImporter\Data\ImportDetails;
9use FileImporter\Data\SourceUrl;
10use FileImporter\Data\TextRevision;
11use FileImporter\Data\TextRevisions;
12use FileImporter\Exceptions\HttpRequestException;
13use FileImporter\Exceptions\ImportException;
14use FileImporter\Exceptions\LocalizedImportException;
15use FileImporter\Interfaces\DetailRetriever;
16use FileImporter\Services\Http\HttpRequestExecutor;
17use MediaWiki\MediaWikiServices;
18use Psr\Log\LoggerInterface;
19use Psr\Log\NullLogger;
20use TitleValue;
21
22/**
23 * @license GPL-2.0-or-later
24 * @author Addshore
25 */
26class ApiDetailRetriever implements DetailRetriever {
27    use MediaWikiSourceUrlParser;
28
29    /** @var HttpApiLookup */
30    private $httpApiLookup;
31    /** @var HttpRequestExecutor */
32    private $httpRequestExecutor;
33    /** @var int */
34    private $maxBytes;
35    /** @var LoggerInterface */
36    private $logger;
37    /**
38     * @var string Placeholder name replacing usernames that have been suppressed as part of
39     * a steward action on the source site.
40     */
41    private $suppressedUsername;
42    /** @var int */
43    private $maxRevisions;
44    /** @var int */
45    private $maxAggregatedBytes;
46
47    private const API_RESULT_LIMIT = 500;
48    private const MAX_REVISIONS = 100;
49    private const MAX_AGGREGATED_BYTES = 250000000;
50
51    /**
52     * @param HttpApiLookup $httpApiLookup
53     * @param HttpRequestExecutor $httpRequestExecutor
54     * @param int $maxBytes
55     * @param LoggerInterface|null $logger
56     *
57     * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid
58     */
59    public function __construct(
60        HttpApiLookup $httpApiLookup,
61        HttpRequestExecutor $httpRequestExecutor,
62        $maxBytes,
63        LoggerInterface $logger = null
64    ) {
65        $this->httpApiLookup = $httpApiLookup;
66        $this->httpRequestExecutor = $httpRequestExecutor;
67        $this->maxBytes = $maxBytes;
68        $this->logger = $logger ?? new NullLogger();
69
70        $config = MediaWikiServices::getInstance()->getMainConfig();
71
72        $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' );
73        $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' );
74        $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' );
75        if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) {
76            throw new ConfigException(
77                'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' .
78                $this->suppressedUsername . '"'
79            );
80        }
81    }
82
83    /**
84     * @param SourceUrl $sourceUrl
85     * @param array $apiParameters
86     *
87     * @return array[]
88     * @throws ImportException when the request failed
89     */
90    private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) {
91        $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
92
93        try {
94            $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
95        } catch ( HttpRequestException $e ) {
96            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
97                $apiUrl ], $e );
98        }
99        $requestData = json_decode( $imageInfoRequest->getContent(), true );
100        return $requestData;
101    }
102
103    /**
104     * @param SourceUrl $sourceUrl
105     *
106     * @return ImportDetails
107     * @throws ImportException e.g. when the file couldn't be found
108     */
109    public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails {
110        $params = $this->getBaseParams( $sourceUrl );
111        $params = $this->addFileRevisionsToParams( $params );
112        $params = $this->addTextRevisionsToParams( $params );
113        $params = $this->addTemplatesToParams( $params );
114        $params = $this->addCategoriesToParams( $params );
115
116        $requestData = $this->sendApiRequest( $sourceUrl, $params );
117
118        if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) {
119            $this->logger->warning(
120                'No pages returned by the API',
121                [
122                    'sourceUrl' => $sourceUrl->getUrl(),
123                    'apiParameters' => $params,
124                ]
125            );
126            throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' );
127        }
128
129        /** @var array $pageInfoData */
130        $pageInfoData = end( $requestData['query']['pages'] );
131        '@phan-var array $pageInfoData';
132
133        if ( array_key_exists( 'missing', $pageInfoData ) ) {
134            if (
135                array_key_exists( 'imagerepository', $pageInfoData ) &&
136                $pageInfoData['imagerepository'] == 'shared'
137            ) {
138                throw new LocalizedImportException(
139                    [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ]
140                );
141            }
142            throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' );
143        }
144
145        if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) {
146            $this->logger->warning(
147                'Bad image or revision info returned by the API',
148                [
149                    'sourceUrl' => $sourceUrl->getUrl(),
150                    'apiParameters' => $params,
151                ]
152            );
153            throw new LocalizedImportException( 'fileimporter-api-badinfo' );
154        }
155
156        // FIXME: Isn't this misplaced here, *before* more revisions are fetched?
157        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
158        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
159
160        while ( array_key_exists( 'continue', $requestData ) ) {
161            $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData );
162        }
163
164        $pageTitle = $pageInfoData['title'];
165        $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null;
166
167        // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
168        $imageInfoData = $pageInfoData['imageinfo'];
169        $revisionsData = $pageInfoData['revisions'];
170        $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle );
171        $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle );
172        $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE );
173        $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY );
174
175        $splitTitle = explode( ':', $pageInfoData['title'] );
176        $titleAfterColon = end( $splitTitle );
177
178        $importDetails = new ImportDetails(
179            $sourceUrl,
180            new TitleValue( NS_FILE, $titleAfterColon ),
181            $textRevisions,
182            $fileRevisions
183        );
184        // FIXME: Better use constructor parameters instead of setters?
185        $importDetails->setPageLanguage( $pageLanguage );
186        $importDetails->setTemplates( $templates );
187        $importDetails->setCategories( $categories );
188
189        return $importDetails;
190    }
191
192    /**
193     * @param array[] $titles
194     * @param int $namespace
195     *
196     * @return string[]
197     */
198    private function reduceTitleList( array $titles, int $namespace ): array {
199        return array_map(
200            static function ( array $title ): string {
201                return $title['title'];
202            },
203            array_filter(
204                $titles,
205                static function ( array $title ) use ( $namespace ): bool {
206                    return $title['ns'] === $namespace;
207                }
208            )
209        );
210    }
211
212    /**
213     * Fetches the next set of revisions unless the number of revisions
214     * exceeds the max revisions limit
215     *
216     * @param SourceUrl $sourceUrl
217     * @param array[] &$requestData
218     * @param array[] &$pageInfoData
219     *
220     * @throws ImportException
221     */
222    private function getMoreRevisions(
223        SourceUrl $sourceUrl,
224        array &$requestData,
225        array &$pageInfoData
226    ): void {
227        $rvContinue = $requestData['continue']['rvcontinue'] ?? null;
228        $iiStart = $requestData['continue']['iistart'] ?? null;
229        $tlContinue = $requestData['continue']['tlcontinue'] ?? null;
230        $clContinue = $requestData['continue']['clcontinue'] ?? null;
231
232        $params = $this->getBaseParams( $sourceUrl );
233
234        if ( $iiStart ) {
235            $params = $this->addFileRevisionsToParams( $params, $iiStart );
236        }
237
238        if ( $rvContinue ) {
239            $params = $this->addTextRevisionsToParams( $params, $rvContinue );
240        }
241
242        if ( $tlContinue ) {
243            $params = $this->addTemplatesToParams( $params, $tlContinue );
244        }
245
246        if ( $clContinue ) {
247            $params = $this->addCategoriesToParams( $params, $clContinue );
248        }
249
250        $requestData = $this->sendApiRequest( $sourceUrl, $params );
251
252        $newPageInfoData = end( $requestData['query']['pages'] );
253
254        if ( array_key_exists( 'revisions', $newPageInfoData ) ) {
255            $pageInfoData['revisions'] =
256                array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] );
257        }
258
259        if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) {
260            $pageInfoData['imageinfo'] =
261                array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
262        }
263
264        if ( array_key_exists( 'templates', $newPageInfoData ) ) {
265            $pageInfoData['templates'] =
266                array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] );
267        }
268
269        if ( array_key_exists( 'categories', $newPageInfoData ) ) {
270            $pageInfoData['categories'] =
271                array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] );
272        }
273
274        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
275        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
276    }
277
278    /**
279     * Throws an exception if the number of revisions to be imported exceeds
280     * the maximum revision limit
281     *
282     * @param SourceUrl $sourceUrl
283     * @param array[] $pageInfoData
284     *
285     * @throws ImportException when exceeding the acceptable maximum
286     */
287    private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void {
288        if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions ||
289            count( $pageInfoData['imageinfo'] ) > $this->maxRevisions ||
290            count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS ||
291            count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) {
292            $this->logger->warning(
293                'Too many revisions were being fetched',
294                [
295                    'sourceUrl' => $sourceUrl->getUrl(),
296                ]
297            );
298
299            throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' );
300        }
301    }
302
303    /**
304     * @param array[] $pageInfoData
305     * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData
306     *
307     * @throws ImportException when exceeding the maximum file size
308     */
309    private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void {
310        $aggregatedFileBytes = 0;
311        foreach ( $pageInfoData['imageinfo'] as $fileVersion ) {
312            $aggregatedFileBytes += $fileVersion['size'] ?? 0;
313            if ( $aggregatedFileBytes > $this->maxAggregatedBytes ||
314                $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) {
315                $versions = count( $pageInfoData['imageinfo'] );
316                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
317            }
318        }
319    }
320
321    /**
322     * @param array[] $imageInfo
323     * @param string $pageTitle
324     *
325     * @return FileRevisions
326     * @throws ImportException when the file is not acceptable, e.g. hidden or to big
327     */
328    private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions {
329        $revisions = [];
330        foreach ( $imageInfo as $revisionInfo ) {
331            if ( array_key_exists( 'filehidden', $revisionInfo ) ) {
332                throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' );
333            }
334
335            if ( array_key_exists( 'filemissing', $revisionInfo ) ) {
336                throw new LocalizedImportException( 'fileimporter-filemissinginrevision' );
337            }
338
339            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
340                $revisionInfo['user'] = $this->suppressedUsername;
341            }
342
343            if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) {
344                $versions = count( $imageInfo );
345                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
346            }
347
348            if ( isset( $revisionInfo['sha1'] ) ) {
349                // Convert from API sha1 format to DB sha1 format. The conversion can be se inside
350                // ApiQueryImageInfo.
351                // * API sha1 format is base 16 padded to 40 chars
352                // * DB sha1 format is base 36 padded to 31 chars
353                $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 );
354            }
355
356            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
357                $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' )
358                    ->plain();
359            }
360
361            $revisionInfo['name'] = $pageTitle;
362            $revisionInfo['description'] = $revisionInfo['comment'] ?? null;
363
364            $revisions[] = new FileRevision( $revisionInfo );
365        }
366        return new FileRevisions( $revisions );
367    }
368
369    /**
370     * @param array[] $revisionsInfo
371     * @param string $pageTitle
372     *
373     * @return TextRevisions
374     */
375    private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions {
376        $revisions = [];
377        foreach ( $revisionsInfo as $revisionInfo ) {
378            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
379                $revisionInfo['user'] = $this->suppressedUsername;
380            }
381
382            if ( array_key_exists( 'texthidden', $revisionInfo ) ) {
383                $revisionInfo['*'] = wfMessage( 'fileimporter-revision-removed-text' )
384                    ->plain();
385            }
386
387            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
388                $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' )
389                    ->plain();
390            }
391
392            if ( !array_key_exists( 'contentmodel', $revisionInfo ) ) {
393                $revisionInfo['contentmodel'] = CONTENT_MODEL_WIKITEXT;
394            }
395
396            if ( !array_key_exists( 'contentformat', $revisionInfo ) ) {
397                $revisionInfo['contentformat'] = CONTENT_FORMAT_WIKITEXT;
398            }
399
400            $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo );
401            $revisionInfo['title'] = $pageTitle;
402            $revisions[] = new TextRevision( $revisionInfo );
403        }
404        return new TextRevisions( $revisions );
405    }
406
407    /**
408     * @param SourceUrl $sourceUrl
409     * @return string[]
410     */
411    private function getBaseParams( SourceUrl $sourceUrl ): array {
412        return [
413            'action' => 'query',
414            'errorformat' => 'plaintext',
415            'format' => 'json',
416            'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ),
417            'prop' => 'info'
418        ];
419    }
420
421    /**
422     * Adds to params base the properties for getting Text Revisions
423     *
424     * @param array $params
425     * @param string|null $rvContinue
426     *
427     * @return array
428     */
429    private function addTextRevisionsToParams( array $params, string $rvContinue = null ): array {
430        $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions';
431
432        if ( $rvContinue ) {
433            $params['rvcontinue'] = $rvContinue;
434        }
435
436        return $params + [
437            'rvlimit' => static::API_RESULT_LIMIT,
438            'rvdir' => 'newer',
439            'rvprop' => implode(
440                '|',
441                [
442                    'flags',
443                    'timestamp',
444                    'user',
445                    'sha1',
446                    'contentmodel',
447                    'comment',
448                    'content',
449                    'tags',
450                ]
451            )
452        ];
453    }
454
455    /**
456     * Adds to params base the properties for getting File Revisions
457     *
458     * @param array $params
459     * @param string|null $iiStart
460     *
461     * @return array
462     */
463    private function addFileRevisionsToParams( array $params, string $iiStart = null ): array {
464        $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo';
465
466        if ( $iiStart ) {
467            $params['iistart'] = $iiStart;
468        }
469
470        return $params + [
471            'iilimit' => static::API_RESULT_LIMIT,
472            'iiurlwidth' => 800,
473            'iiurlheight' => 400,
474            'iiprop' => implode(
475                '|',
476                [
477                    'timestamp',
478                    'user',
479                    'userid',
480                    'comment',
481                    'canonicaltitle',
482                    'url',
483                    'size',
484                    'sha1',
485                    'archivename',
486                ]
487            )
488        ];
489    }
490
491    /**
492     * Adds to params base the properties for getting Templates
493     *
494     * @param array $params
495     * @param string|null $tlContinue
496     *
497     * @return array
498     */
499    private function addTemplatesToParams( array $params, string $tlContinue = null ): array {
500        $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates';
501
502        if ( $tlContinue ) {
503            $params['tlcontinue'] = $tlContinue;
504        }
505
506        return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ];
507    }
508
509    /**
510     * Adds to params base the properties for getting Categories
511     *
512     * @param array $params
513     * @param string|null $clContinue
514     *
515     * @return array
516     */
517    private function addCategoriesToParams( array $params, string $clContinue = null ): array {
518        $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories';
519
520        if ( $clContinue ) {
521            $params['clcontinue'] = $clContinue;
522        }
523
524        return $params + [ 'cllimit' => static::API_RESULT_LIMIT ];
525    }
526
527}