Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
88.55% covered (warning)
88.55%
201 / 227
42.86% covered (danger)
42.86%
6 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiDetailRetriever
88.55% covered (warning)
88.55%
201 / 227
42.86% covered (danger)
42.86%
6 / 14
61.88
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
2
 sendApiRequest
57.14% covered (warning)
57.14%
4 / 7
0.00% covered (danger)
0.00%
0 / 1
2.31
 getImportDetails
98.21% covered (success)
98.21%
55 / 56
0.00% covered (danger)
0.00%
0 / 1
8
 reduceTitleList
81.82% covered (warning)
81.82%
9 / 11
0.00% covered (danger)
0.00%
0 / 1
1.01
 getMoreRevisions
79.31% covered (warning)
79.31%
23 / 29
0.00% covered (danger)
0.00%
0 / 1
9.72
 checkRevisionCount
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
5
 checkMaxRevisionAggregatedBytes
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getFileRevisionsFromImageInfo
65.00% covered (warning)
65.00%
13 / 20
0.00% covered (danger)
0.00%
0 / 1
10.74
 getTextRevisionsFromRevisionsInfo
64.29% covered (warning)
64.29%
9 / 14
0.00% covered (danger)
0.00%
0 / 1
6.14
 getBaseParams
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 addTextRevisionsToParams
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
3
 addFileRevisionsToParams
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
3
 addTemplatesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 addCategoriesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use FileImporter\Data\FileRevision;
6use FileImporter\Data\FileRevisions;
7use FileImporter\Data\ImportDetails;
8use FileImporter\Data\SourceUrl;
9use FileImporter\Data\TextRevision;
10use FileImporter\Data\TextRevisions;
11use FileImporter\Exceptions\HttpRequestException;
12use FileImporter\Exceptions\ImportException;
13use FileImporter\Exceptions\LocalizedImportException;
14use FileImporter\Interfaces\DetailRetriever;
15use FileImporter\Services\Http\HttpRequestExecutor;
16use MediaWiki\Config\ConfigException;
17use MediaWiki\MediaWikiServices;
18use MediaWiki\Revision\SlotRecord;
19use MediaWiki\Title\TitleValue;
20use Psr\Log\LoggerInterface;
21use Psr\Log\NullLogger;
22
23/**
24 * @license GPL-2.0-or-later
25 * @author Addshore
26 */
27class ApiDetailRetriever implements DetailRetriever {
28    use MediaWikiSourceUrlParser;
29
30    /** @var HttpApiLookup */
31    private $httpApiLookup;
32    /** @var HttpRequestExecutor */
33    private $httpRequestExecutor;
34    /** @var int */
35    private $maxBytes;
36    /** @var LoggerInterface */
37    private $logger;
38    /**
39     * @var string Placeholder name replacing usernames that have been suppressed as part of
40     * a steward action on the source site.
41     */
42    private $suppressedUsername;
43    /** @var int */
44    private $maxRevisions;
45    /** @var int */
46    private $maxAggregatedBytes;
47
48    private const API_RESULT_LIMIT = 500;
49    private const MAX_REVISIONS = 100;
50    private const MAX_AGGREGATED_BYTES = 250000000;
51
52    /**
53     * @param HttpApiLookup $httpApiLookup
54     * @param HttpRequestExecutor $httpRequestExecutor
55     * @param int $maxBytes
56     * @param LoggerInterface|null $logger
57     *
58     * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid
59     */
60    public function __construct(
61        HttpApiLookup $httpApiLookup,
62        HttpRequestExecutor $httpRequestExecutor,
63        $maxBytes,
64        LoggerInterface $logger = null
65    ) {
66        $this->httpApiLookup = $httpApiLookup;
67        $this->httpRequestExecutor = $httpRequestExecutor;
68        $this->maxBytes = $maxBytes;
69        $this->logger = $logger ?? new NullLogger();
70
71        $config = MediaWikiServices::getInstance()->getMainConfig();
72
73        $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' );
74        $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' );
75        $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' );
76        if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) {
77            throw new ConfigException(
78                'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' .
79                $this->suppressedUsername . '"'
80            );
81        }
82    }
83
84    /**
85     * @return array[]
86     * @throws ImportException when the request failed
87     */
88    private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) {
89        $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
90
91        try {
92            $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
93        } catch ( HttpRequestException $e ) {
94            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
95                $apiUrl ], $e );
96        }
97        $requestData = json_decode( $imageInfoRequest->getContent(), true );
98        return $requestData;
99    }
100
101    /**
102     * @throws ImportException e.g. when the file couldn't be found
103     */
104    public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails {
105        $params = $this->getBaseParams( $sourceUrl );
106        $params = $this->addFileRevisionsToParams( $params );
107        $params = $this->addTextRevisionsToParams( $params );
108        $params = $this->addTemplatesToParams( $params );
109        $params = $this->addCategoriesToParams( $params );
110
111        $requestData = $this->sendApiRequest( $sourceUrl, $params );
112
113        if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) {
114            $this->logger->warning(
115                'No pages returned by the API',
116                [
117                    'sourceUrl' => $sourceUrl->getUrl(),
118                    'apiParameters' => $params,
119                ]
120            );
121            throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' );
122        }
123
124        /** @var array $pageInfoData */
125        $pageInfoData = end( $requestData['query']['pages'] );
126        '@phan-var array $pageInfoData';
127
128        if ( array_key_exists( 'missing', $pageInfoData ) ) {
129            if (
130                array_key_exists( 'imagerepository', $pageInfoData ) &&
131                $pageInfoData['imagerepository'] == 'shared'
132            ) {
133                throw new LocalizedImportException(
134                    [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ]
135                );
136            }
137            throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' );
138        }
139
140        if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) {
141            $this->logger->warning(
142                'Bad image or revision info returned by the API',
143                [
144                    'sourceUrl' => $sourceUrl->getUrl(),
145                    'apiParameters' => $params,
146                ]
147            );
148            throw new LocalizedImportException( 'fileimporter-api-badinfo' );
149        }
150
151        // FIXME: Isn't this misplaced here, *before* more revisions are fetched?
152        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
153        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
154
155        while ( array_key_exists( 'continue', $requestData ) ) {
156            $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData );
157        }
158
159        $pageTitle = $pageInfoData['title'];
160        $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null;
161
162        // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
163        $imageInfoData = $pageInfoData['imageinfo'];
164        $revisionsData = $pageInfoData['revisions'];
165        $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle );
166        $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle );
167        $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE );
168        $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY );
169
170        $splitTitle = explode( ':', $pageInfoData['title'] );
171        $titleAfterColon = end( $splitTitle );
172
173        $importDetails = new ImportDetails(
174            $sourceUrl,
175            new TitleValue( NS_FILE, $titleAfterColon ),
176            $textRevisions,
177            $fileRevisions
178        );
179        // FIXME: Better use constructor parameters instead of setters?
180        $importDetails->setPageLanguage( $pageLanguage );
181        $importDetails->setTemplates( $templates );
182        $importDetails->setCategories( $categories );
183
184        return $importDetails;
185    }
186
187    /**
188     * @param array[] $titles
189     * @param int $namespace
190     *
191     * @return string[]
192     */
193    private function reduceTitleList( array $titles, int $namespace ): array {
194        return array_map(
195            static function ( array $title ): string {
196                return $title['title'];
197            },
198            array_filter(
199                $titles,
200                static function ( array $title ) use ( $namespace ): bool {
201                    return $title['ns'] === $namespace;
202                }
203            )
204        );
205    }
206
207    /**
208     * Fetches the next set of revisions unless the number of revisions
209     * exceeds the max revisions limit
210     *
211     * @param SourceUrl $sourceUrl
212     * @param array[] &$requestData
213     * @param array[] &$pageInfoData
214     *
215     * @throws ImportException
216     */
217    private function getMoreRevisions(
218        SourceUrl $sourceUrl,
219        array &$requestData,
220        array &$pageInfoData
221    ): void {
222        $rvContinue = $requestData['continue']['rvcontinue'] ?? null;
223        $iiStart = $requestData['continue']['iistart'] ?? null;
224        $tlContinue = $requestData['continue']['tlcontinue'] ?? null;
225        $clContinue = $requestData['continue']['clcontinue'] ?? null;
226
227        $params = $this->getBaseParams( $sourceUrl );
228
229        if ( $iiStart ) {
230            $params = $this->addFileRevisionsToParams( $params, $iiStart );
231        }
232
233        if ( $rvContinue ) {
234            $params = $this->addTextRevisionsToParams( $params, $rvContinue );
235        }
236
237        if ( $tlContinue ) {
238            $params = $this->addTemplatesToParams( $params, $tlContinue );
239        }
240
241        if ( $clContinue ) {
242            $params = $this->addCategoriesToParams( $params, $clContinue );
243        }
244
245        $requestData = $this->sendApiRequest( $sourceUrl, $params );
246
247        $newPageInfoData = end( $requestData['query']['pages'] );
248
249        if ( array_key_exists( 'revisions', $newPageInfoData ) ) {
250            $pageInfoData['revisions'] =
251                array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] );
252        }
253
254        if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) {
255            $pageInfoData['imageinfo'] =
256                array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
257        }
258
259        if ( array_key_exists( 'templates', $newPageInfoData ) ) {
260            $pageInfoData['templates'] =
261                array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] );
262        }
263
264        if ( array_key_exists( 'categories', $newPageInfoData ) ) {
265            $pageInfoData['categories'] =
266                array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] );
267        }
268
269        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
270        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
271    }
272
273    /**
274     * Throws an exception if the number of revisions to be imported exceeds
275     * the maximum revision limit
276     *
277     * @param SourceUrl $sourceUrl
278     * @param array[] $pageInfoData
279     *
280     * @throws ImportException when exceeding the acceptable maximum
281     */
282    private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void {
283        if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions ||
284            count( $pageInfoData['imageinfo'] ) > $this->maxRevisions ||
285            count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS ||
286            count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) {
287            $this->logger->warning(
288                'Too many revisions were being fetched',
289                [
290                    'sourceUrl' => $sourceUrl->getUrl(),
291                ]
292            );
293
294            throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' );
295        }
296    }
297
298    /**
299     * @param array[] $pageInfoData
300     * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData
301     *
302     * @throws ImportException when exceeding the maximum file size
303     */
304    private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void {
305        $aggregatedFileBytes = 0;
306        foreach ( $pageInfoData['imageinfo'] as $fileVersion ) {
307            $aggregatedFileBytes += $fileVersion['size'] ?? 0;
308            if ( $aggregatedFileBytes > $this->maxAggregatedBytes ||
309                $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) {
310                $versions = count( $pageInfoData['imageinfo'] );
311                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
312            }
313        }
314    }
315
316    /**
317     * @param array[] $imageInfo
318     * @param string $pageTitle
319     *
320     * @throws ImportException when the file is not acceptable, e.g. hidden or to big
321     */
322    private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions {
323        $revisions = [];
324        foreach ( $imageInfo as $revisionInfo ) {
325            if ( array_key_exists( 'filehidden', $revisionInfo ) ) {
326                throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' );
327            }
328
329            if ( array_key_exists( 'filemissing', $revisionInfo ) ) {
330                throw new LocalizedImportException( 'fileimporter-filemissinginrevision' );
331            }
332
333            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
334                $revisionInfo['user'] ??= $this->suppressedUsername;
335            }
336
337            if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) {
338                $versions = count( $imageInfo );
339                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
340            }
341
342            if ( isset( $revisionInfo['sha1'] ) ) {
343                // Convert from API sha1 format to DB sha1 format. The conversion can be se inside
344                // ApiQueryImageInfo.
345                // * API sha1 format is base 16 padded to 40 chars
346                // * DB sha1 format is base 36 padded to 31 chars
347                $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 );
348            }
349
350            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
351                $revisionInfo['comment'] ??=
352                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
353            }
354
355            $revisionInfo['name'] = $pageTitle;
356            $revisionInfo['description'] = $revisionInfo['comment'] ?? null;
357
358            $revisions[] = new FileRevision( $revisionInfo );
359        }
360        return new FileRevisions( $revisions );
361    }
362
363    /**
364     * @param array[] $revisionsInfo
365     * @param string $pageTitle
366     */
367    private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions {
368        $revisions = [];
369        foreach ( $revisionsInfo as $revisionInfo ) {
370            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
371                $revisionInfo['user'] ??= $this->suppressedUsername;
372            }
373
374            if ( array_key_exists( 'texthidden', $revisionInfo ) ) {
375                $revisionInfo['slots'][SlotRecord::MAIN]['content'] ??=
376                    wfMessage( 'fileimporter-revision-removed-text' )->plain();
377            }
378
379            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
380                $revisionInfo['comment'] ??=
381                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
382            }
383
384            $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo );
385            $revisionInfo['title'] = $pageTitle;
386            $revisions[] = new TextRevision( $revisionInfo );
387        }
388        return new TextRevisions( $revisions );
389    }
390
391    private function getBaseParams( SourceUrl $sourceUrl ): array {
392        return [
393            'action' => 'query',
394            'errorformat' => 'plaintext',
395            'format' => 'json',
396            'formatversion' => '2',
397            'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ),
398            'prop' => 'info'
399        ];
400    }
401
402    /**
403     * Adds to params base the properties for getting Text Revisions
404     */
405    private function addTextRevisionsToParams( array $params, string $rvContinue = null ): array {
406        $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions';
407
408        if ( $rvContinue ) {
409            $params['rvcontinue'] = $rvContinue;
410        }
411
412        return $params + [
413            'rvlimit' => static::API_RESULT_LIMIT,
414            'rvdir' => 'newer',
415            'rvslots' => SlotRecord::MAIN,
416            'rvprop' => implode(
417                '|',
418                [
419                    'flags',
420                    'timestamp',
421                    'user',
422                    'sha1',
423                    'contentmodel',
424                    'comment',
425                    'content',
426                    'tags',
427                ]
428            )
429        ];
430    }
431
432    /**
433     * Adds to params base the properties for getting File Revisions
434     */
435    private function addFileRevisionsToParams( array $params, string $iiStart = null ): array {
436        $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo';
437
438        if ( $iiStart ) {
439            $params['iistart'] = $iiStart;
440        }
441
442        return $params + [
443            'iilimit' => static::API_RESULT_LIMIT,
444            'iiurlwidth' => 800,
445            'iiurlheight' => 400,
446            'iiprop' => implode(
447                '|',
448                [
449                    'timestamp',
450                    'user',
451                    'userid',
452                    'comment',
453                    'canonicaltitle',
454                    'url',
455                    'size',
456                    'sha1',
457                    'archivename',
458                ]
459            )
460        ];
461    }
462
463    /**
464     * Adds to params base the properties for getting Templates
465     */
466    private function addTemplatesToParams( array $params, string $tlContinue = null ): array {
467        $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates';
468
469        if ( $tlContinue ) {
470            $params['tlcontinue'] = $tlContinue;
471        }
472
473        return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ];
474    }
475
476    /**
477     * Adds to params base the properties for getting Categories
478     */
479    private function addCategoriesToParams( array $params, string $clContinue = null ): array {
480        $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories';
481
482        if ( $clContinue ) {
483            $params['clcontinue'] = $clContinue;
484        }
485
486        return $params + [ 'cllimit' => static::API_RESULT_LIMIT ];
487    }
488
489}