Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
88.18% covered (warning)
88.18%
194 / 220
42.86% covered (danger)
42.86%
6 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiDetailRetriever
88.18% covered (warning)
88.18%
194 / 220
42.86% covered (danger)
42.86%
6 / 14
63.55
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
2
 sendApiRequest
57.14% covered (warning)
57.14%
4 / 7
0.00% covered (danger)
0.00%
0 / 1
2.31
 getImportDetails
98.18% covered (success)
98.18%
54 / 55
0.00% covered (danger)
0.00%
0 / 1
7
 reduceTitleList
60.00% covered (warning)
60.00%
3 / 5
0.00% covered (danger)
0.00%
0 / 1
3.58
 getMoreRevisions
79.31% covered (warning)
79.31%
23 / 29
0.00% covered (danger)
0.00%
0 / 1
9.72
 checkRevisionCount
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
5
 checkMaxRevisionAggregatedBytes
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getFileRevisionsFromImageInfo
65.00% covered (warning)
65.00%
13 / 20
0.00% covered (danger)
0.00%
0 / 1
10.74
 getTextRevisionsFromRevisionsInfo
64.29% covered (warning)
64.29%
9 / 14
0.00% covered (danger)
0.00%
0 / 1
6.14
 getBaseParams
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 addTextRevisionsToParams
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
3
 addFileRevisionsToParams
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
3
 addTemplatesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 addCategoriesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use FileImporter\Data\FileRevision;
6use FileImporter\Data\FileRevisions;
7use FileImporter\Data\ImportDetails;
8use FileImporter\Data\SourceUrl;
9use FileImporter\Data\TextRevision;
10use FileImporter\Data\TextRevisions;
11use FileImporter\Exceptions\HttpRequestException;
12use FileImporter\Exceptions\ImportException;
13use FileImporter\Exceptions\LocalizedImportException;
14use FileImporter\Interfaces\DetailRetriever;
15use FileImporter\Services\Http\HttpRequestExecutor;
16use MediaWiki\Config\ConfigException;
17use MediaWiki\MediaWikiServices;
18use MediaWiki\Revision\SlotRecord;
19use MediaWiki\Title\TitleValue;
20use Psr\Log\LoggerInterface;
21use Psr\Log\NullLogger;
22
23/**
24 * @license GPL-2.0-or-later
25 * @author Addshore
26 */
27class ApiDetailRetriever implements DetailRetriever {
28    use MediaWikiSourceUrlParser;
29
30    private HttpApiLookup $httpApiLookup;
31    private HttpRequestExecutor $httpRequestExecutor;
32    private int $maxBytes;
33    private LoggerInterface $logger;
34    /**
35     * @var string Placeholder name replacing usernames that have been suppressed as part of
36     * a steward action on the source site.
37     */
38    private $suppressedUsername;
39    private int $maxRevisions;
40    private int $maxAggregatedBytes;
41
42    private const API_RESULT_LIMIT = 500;
43    private const MAX_REVISIONS = 100;
44    private const MAX_AGGREGATED_BYTES = 250000000;
45
46    /**
47     * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid
48     */
49    public function __construct(
50        HttpApiLookup $httpApiLookup,
51        HttpRequestExecutor $httpRequestExecutor,
52        int $maxBytes,
53        ?LoggerInterface $logger = null
54    ) {
55        $this->httpApiLookup = $httpApiLookup;
56        $this->httpRequestExecutor = $httpRequestExecutor;
57        $this->maxBytes = $maxBytes;
58        $this->logger = $logger ?? new NullLogger();
59
60        $config = MediaWikiServices::getInstance()->getMainConfig();
61
62        $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' );
63        $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' );
64        $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' );
65        if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) {
66            throw new ConfigException(
67                'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' .
68                $this->suppressedUsername . '"'
69            );
70        }
71    }
72
73    /**
74     * @return array[]
75     * @throws ImportException when the request failed
76     */
77    private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) {
78        $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
79
80        try {
81            $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
82        } catch ( HttpRequestException $e ) {
83            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
84                $apiUrl ], $e );
85        }
86        $requestData = json_decode( $imageInfoRequest->getContent(), true );
87        return $requestData;
88    }
89
90    /**
91     * @throws ImportException e.g. when the file couldn't be found
92     */
93    public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails {
94        $params = $this->getBaseParams( $sourceUrl );
95        $params = $this->addFileRevisionsToParams( $params );
96        $params = $this->addTextRevisionsToParams( $params );
97        $params = $this->addTemplatesToParams( $params );
98        $params = $this->addCategoriesToParams( $params );
99
100        $requestData = $this->sendApiRequest( $sourceUrl, $params );
101
102        if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) {
103            $this->logger->warning(
104                'No pages returned by the API',
105                [
106                    'sourceUrl' => $sourceUrl->getUrl(),
107                    'apiParameters' => $params,
108                ]
109            );
110            throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' );
111        }
112
113        /** @var array $pageInfoData */
114        $pageInfoData = end( $requestData['query']['pages'] );
115        '@phan-var array $pageInfoData';
116
117        if ( ( $pageInfoData['missing'] ?? false ) !== false ) {
118            if ( ( $pageInfoData['imagerepository'] ?? null ) === 'shared' ) {
119                throw new LocalizedImportException(
120                    [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ]
121                );
122            }
123            throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' );
124        }
125
126        if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) {
127            $this->logger->warning(
128                'Bad image or revision info returned by the API',
129                [
130                    'sourceUrl' => $sourceUrl->getUrl(),
131                    'apiParameters' => $params,
132                ]
133            );
134            throw new LocalizedImportException( 'fileimporter-api-badinfo' );
135        }
136
137        // FIXME: Isn't this misplaced here, *before* more revisions are fetched?
138        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
139        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
140
141        while ( array_key_exists( 'continue', $requestData ) ) {
142            $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData );
143        }
144
145        $pageTitle = $pageInfoData['title'];
146        $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null;
147
148        // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
149        $imageInfoData = $pageInfoData['imageinfo'];
150        $revisionsData = $pageInfoData['revisions'];
151        $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle );
152        $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle );
153        $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE );
154        $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY );
155
156        $splitTitle = explode( ':', $pageInfoData['title'] );
157        $titleAfterColon = end( $splitTitle );
158
159        $importDetails = new ImportDetails(
160            $sourceUrl,
161            new TitleValue( NS_FILE, $titleAfterColon ),
162            $textRevisions,
163            $fileRevisions
164        );
165        // FIXME: Better use constructor parameters instead of setters?
166        $importDetails->setPageLanguage( $pageLanguage );
167        $importDetails->setTemplates( $templates );
168        $importDetails->setCategories( $categories );
169
170        return $importDetails;
171    }
172
173    /**
174     * @param array[] $results Result set as returned by the API
175     * @param int $namespace
176     *
177     * @return string[]
178     */
179    private function reduceTitleList( array $results, int $namespace ): array {
180        $titles = [];
181        foreach ( $results as $result ) {
182            if ( $result['ns'] === $namespace ) {
183                $titles[] = $result['title'];
184            }
185        }
186        return $titles;
187    }
188
189    /**
190     * Fetches the next set of revisions unless the number of revisions
191     * exceeds the max revisions limit
192     *
193     * @param SourceUrl $sourceUrl
194     * @param array[] &$requestData
195     * @param array[] &$pageInfoData
196     *
197     * @throws ImportException
198     */
199    private function getMoreRevisions(
200        SourceUrl $sourceUrl,
201        array &$requestData,
202        array &$pageInfoData
203    ): void {
204        $rvContinue = $requestData['continue']['rvcontinue'] ?? null;
205        $iiStart = $requestData['continue']['iistart'] ?? null;
206        $tlContinue = $requestData['continue']['tlcontinue'] ?? null;
207        $clContinue = $requestData['continue']['clcontinue'] ?? null;
208
209        $params = $this->getBaseParams( $sourceUrl );
210
211        if ( $iiStart ) {
212            $params = $this->addFileRevisionsToParams( $params, $iiStart );
213        }
214
215        if ( $rvContinue ) {
216            $params = $this->addTextRevisionsToParams( $params, $rvContinue );
217        }
218
219        if ( $tlContinue ) {
220            $params = $this->addTemplatesToParams( $params, $tlContinue );
221        }
222
223        if ( $clContinue ) {
224            $params = $this->addCategoriesToParams( $params, $clContinue );
225        }
226
227        $requestData = $this->sendApiRequest( $sourceUrl, $params );
228
229        $newPageInfoData = end( $requestData['query']['pages'] );
230
231        if ( array_key_exists( 'revisions', $newPageInfoData ) ) {
232            $pageInfoData['revisions'] =
233                array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] );
234        }
235
236        if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) {
237            $pageInfoData['imageinfo'] =
238                array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
239        }
240
241        if ( array_key_exists( 'templates', $newPageInfoData ) ) {
242            $pageInfoData['templates'] =
243                array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] );
244        }
245
246        if ( array_key_exists( 'categories', $newPageInfoData ) ) {
247            $pageInfoData['categories'] =
248                array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] );
249        }
250
251        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
252        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
253    }
254
255    /**
256     * Throws an exception if the number of revisions to be imported exceeds
257     * the maximum revision limit
258     *
259     * @param SourceUrl $sourceUrl
260     * @param array[] $pageInfoData
261     *
262     * @throws ImportException when exceeding the acceptable maximum
263     */
264    private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void {
265        if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions ||
266            count( $pageInfoData['imageinfo'] ) > $this->maxRevisions ||
267            count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS ||
268            count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) {
269            $this->logger->warning(
270                'Too many revisions were being fetched',
271                [
272                    'sourceUrl' => $sourceUrl->getUrl(),
273                ]
274            );
275
276            throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' );
277        }
278    }
279
280    /**
281     * @param array[] $pageInfoData
282     * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData
283     *
284     * @throws ImportException when exceeding the maximum file size
285     */
286    private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void {
287        $aggregatedFileBytes = 0;
288        foreach ( $pageInfoData['imageinfo'] as $fileVersion ) {
289            $aggregatedFileBytes += $fileVersion['size'] ?? 0;
290            if ( $aggregatedFileBytes > $this->maxAggregatedBytes ||
291                $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) {
292                $versions = count( $pageInfoData['imageinfo'] );
293                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
294            }
295        }
296    }
297
298    /**
299     * @param array[] $imageInfo
300     * @param string $pageTitle
301     *
302     * @throws ImportException when the file is not acceptable, e.g. hidden or to big
303     */
304    private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions {
305        $revisions = [];
306        foreach ( $imageInfo as $revisionInfo ) {
307            if ( ( $revisionInfo['filehidden'] ?? false ) !== false ) {
308                throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' );
309            }
310
311            if ( ( $revisionInfo['filemissing'] ?? false ) !== false ) {
312                throw new LocalizedImportException( 'fileimporter-filemissinginrevision' );
313            }
314
315            if ( ( $revisionInfo['userhidden'] ?? false ) !== false ) {
316                $revisionInfo['user'] ??= $this->suppressedUsername;
317            }
318
319            if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) {
320                $versions = count( $imageInfo );
321                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
322            }
323
324            if ( isset( $revisionInfo['sha1'] ) ) {
325                // Convert from API sha1 format to DB sha1 format. The conversion can be se inside
326                // ApiQueryImageInfo.
327                // * API sha1 format is base 16 padded to 40 chars
328                // * DB sha1 format is base 36 padded to 31 chars
329                $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 );
330            }
331
332            if ( ( $revisionInfo['commenthidden'] ?? false ) !== false ) {
333                $revisionInfo['comment'] ??=
334                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
335            }
336
337            $revisionInfo['name'] = $pageTitle;
338            $revisionInfo['description'] = $revisionInfo['comment'] ?? null;
339
340            $revisions[] = new FileRevision( $revisionInfo );
341        }
342        return new FileRevisions( $revisions );
343    }
344
345    /**
346     * @param array[] $revisionsInfo
347     * @param string $pageTitle
348     */
349    private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions {
350        $revisions = [];
351        foreach ( $revisionsInfo as $revisionInfo ) {
352            if ( ( $revisionInfo['userhidden'] ?? false ) !== false ) {
353                $revisionInfo['user'] ??= $this->suppressedUsername;
354            }
355
356            if ( ( $revisionInfo['texthidden'] ?? false ) !== false ) {
357                $revisionInfo['slots'][SlotRecord::MAIN]['content'] ??=
358                    wfMessage( 'fileimporter-revision-removed-text' )->plain();
359            }
360
361            if ( ( $revisionInfo['commenthidden'] ?? false ) !== false ) {
362                $revisionInfo['comment'] ??=
363                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
364            }
365
366            // Backwards-compatible with formatversion=1 where "" means true
367            $revisionInfo['minor'] = ( $revisionInfo['minor'] ?? false ) !== false;
368            $revisionInfo['title'] = $pageTitle;
369            $revisions[] = new TextRevision( $revisionInfo );
370        }
371        return new TextRevisions( $revisions );
372    }
373
374    private function getBaseParams( SourceUrl $sourceUrl ): array {
375        return [
376            'action' => 'query',
377            'errorformat' => 'plaintext',
378            'format' => 'json',
379            'formatversion' => '2',
380            'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ),
381            'prop' => 'info'
382        ];
383    }
384
385    /**
386     * Adds to params base the properties for getting Text Revisions
387     */
388    private function addTextRevisionsToParams( array $params, ?string $rvContinue = null ): array {
389        $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions';
390
391        if ( $rvContinue ) {
392            $params['rvcontinue'] = $rvContinue;
393        }
394
395        return $params + [
396            'rvlimit' => static::API_RESULT_LIMIT,
397            'rvdir' => 'newer',
398            'rvslots' => SlotRecord::MAIN,
399            'rvprop' => implode(
400                '|',
401                [
402                    'flags',
403                    'timestamp',
404                    'user',
405                    'sha1',
406                    'contentmodel',
407                    'comment',
408                    'content',
409                    'tags',
410                ]
411            )
412        ];
413    }
414
415    /**
416     * Adds to params base the properties for getting File Revisions
417     */
418    private function addFileRevisionsToParams( array $params, ?string $iiStart = null ): array {
419        $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo';
420
421        if ( $iiStart ) {
422            $params['iistart'] = $iiStart;
423        }
424
425        return $params + [
426            'iilimit' => static::API_RESULT_LIMIT,
427            'iiurlwidth' => 800,
428            'iiurlheight' => 400,
429            'iiprop' => implode(
430                '|',
431                [
432                    'timestamp',
433                    'user',
434                    'userid',
435                    'comment',
436                    'canonicaltitle',
437                    'url',
438                    'size',
439                    'sha1',
440                    'archivename',
441                ]
442            )
443        ];
444    }
445
446    /**
447     * Adds to params base the properties for getting Templates
448     */
449    private function addTemplatesToParams( array $params, ?string $tlContinue = null ): array {
450        $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates';
451
452        if ( $tlContinue ) {
453            $params['tlcontinue'] = $tlContinue;
454        }
455
456        return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ];
457    }
458
459    /**
460     * Adds to params base the properties for getting Categories
461     */
462    private function addCategoriesToParams( array $params, ?string $clContinue = null ): array {
463        $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories';
464
465        if ( $clContinue ) {
466            $params['clcontinue'] = $clContinue;
467        }
468
469        return $params + [ 'cllimit' => static::API_RESULT_LIMIT ];
470    }
471
472}