Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
88.24% covered (warning)
88.24%
195 / 221
42.86% covered (danger)
42.86%
6 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiDetailRetriever
88.24% covered (warning)
88.24%
195 / 221
42.86% covered (danger)
42.86%
6 / 14
64.67
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
2
 sendApiRequest
57.14% covered (warning)
57.14%
4 / 7
0.00% covered (danger)
0.00%
0 / 1
2.31
 getImportDetails
98.21% covered (success)
98.21%
55 / 56
0.00% covered (danger)
0.00%
0 / 1
8
 reduceTitleList
60.00% covered (warning)
60.00%
3 / 5
0.00% covered (danger)
0.00%
0 / 1
3.58
 getMoreRevisions
79.31% covered (warning)
79.31%
23 / 29
0.00% covered (danger)
0.00%
0 / 1
9.72
 checkRevisionCount
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
5
 checkMaxRevisionAggregatedBytes
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getFileRevisionsFromImageInfo
65.00% covered (warning)
65.00%
13 / 20
0.00% covered (danger)
0.00%
0 / 1
10.74
 getTextRevisionsFromRevisionsInfo
64.29% covered (warning)
64.29%
9 / 14
0.00% covered (danger)
0.00%
0 / 1
6.14
 getBaseParams
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 addTextRevisionsToParams
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
3
 addFileRevisionsToParams
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
3
 addTemplatesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 addCategoriesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use FileImporter\Data\FileRevision;
6use FileImporter\Data\FileRevisions;
7use FileImporter\Data\ImportDetails;
8use FileImporter\Data\SourceUrl;
9use FileImporter\Data\TextRevision;
10use FileImporter\Data\TextRevisions;
11use FileImporter\Exceptions\HttpRequestException;
12use FileImporter\Exceptions\ImportException;
13use FileImporter\Exceptions\LocalizedImportException;
14use FileImporter\Interfaces\DetailRetriever;
15use FileImporter\Services\Http\HttpRequestExecutor;
16use MediaWiki\Config\ConfigException;
17use MediaWiki\MediaWikiServices;
18use MediaWiki\Revision\SlotRecord;
19use MediaWiki\Title\TitleValue;
20use Psr\Log\LoggerInterface;
21use Psr\Log\NullLogger;
22
23/**
24 * @license GPL-2.0-or-later
25 * @author Addshore
26 */
27class ApiDetailRetriever implements DetailRetriever {
28    use MediaWikiSourceUrlParser;
29
30    private HttpApiLookup $httpApiLookup;
31    private HttpRequestExecutor $httpRequestExecutor;
32    /** @var int */
33    private $maxBytes;
34    private LoggerInterface $logger;
35    /**
36     * @var string Placeholder name replacing usernames that have been suppressed as part of
37     * a steward action on the source site.
38     */
39    private $suppressedUsername;
40    /** @var int */
41    private $maxRevisions;
42    /** @var int */
43    private $maxAggregatedBytes;
44
45    private const API_RESULT_LIMIT = 500;
46    private const MAX_REVISIONS = 100;
47    private const MAX_AGGREGATED_BYTES = 250000000;
48
49    /**
50     * @param HttpApiLookup $httpApiLookup
51     * @param HttpRequestExecutor $httpRequestExecutor
52     * @param int $maxBytes
53     * @param LoggerInterface|null $logger
54     *
55     * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid
56     */
57    public function __construct(
58        HttpApiLookup $httpApiLookup,
59        HttpRequestExecutor $httpRequestExecutor,
60        $maxBytes,
61        LoggerInterface $logger = null
62    ) {
63        $this->httpApiLookup = $httpApiLookup;
64        $this->httpRequestExecutor = $httpRequestExecutor;
65        $this->maxBytes = $maxBytes;
66        $this->logger = $logger ?? new NullLogger();
67
68        $config = MediaWikiServices::getInstance()->getMainConfig();
69
70        $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' );
71        $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' );
72        $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' );
73        if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) {
74            throw new ConfigException(
75                'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' .
76                $this->suppressedUsername . '"'
77            );
78        }
79    }
80
81    /**
82     * @return array[]
83     * @throws ImportException when the request failed
84     */
85    private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) {
86        $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
87
88        try {
89            $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
90        } catch ( HttpRequestException $e ) {
91            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
92                $apiUrl ], $e );
93        }
94        $requestData = json_decode( $imageInfoRequest->getContent(), true );
95        return $requestData;
96    }
97
98    /**
99     * @throws ImportException e.g. when the file couldn't be found
100     */
101    public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails {
102        $params = $this->getBaseParams( $sourceUrl );
103        $params = $this->addFileRevisionsToParams( $params );
104        $params = $this->addTextRevisionsToParams( $params );
105        $params = $this->addTemplatesToParams( $params );
106        $params = $this->addCategoriesToParams( $params );
107
108        $requestData = $this->sendApiRequest( $sourceUrl, $params );
109
110        if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) {
111            $this->logger->warning(
112                'No pages returned by the API',
113                [
114                    'sourceUrl' => $sourceUrl->getUrl(),
115                    'apiParameters' => $params,
116                ]
117            );
118            throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' );
119        }
120
121        /** @var array $pageInfoData */
122        $pageInfoData = end( $requestData['query']['pages'] );
123        '@phan-var array $pageInfoData';
124
125        if ( array_key_exists( 'missing', $pageInfoData ) ) {
126            if (
127                array_key_exists( 'imagerepository', $pageInfoData ) &&
128                $pageInfoData['imagerepository'] == 'shared'
129            ) {
130                throw new LocalizedImportException(
131                    [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ]
132                );
133            }
134            throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' );
135        }
136
137        if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) {
138            $this->logger->warning(
139                'Bad image or revision info returned by the API',
140                [
141                    'sourceUrl' => $sourceUrl->getUrl(),
142                    'apiParameters' => $params,
143                ]
144            );
145            throw new LocalizedImportException( 'fileimporter-api-badinfo' );
146        }
147
148        // FIXME: Isn't this misplaced here, *before* more revisions are fetched?
149        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
150        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
151
152        while ( array_key_exists( 'continue', $requestData ) ) {
153            $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData );
154        }
155
156        $pageTitle = $pageInfoData['title'];
157        $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null;
158
159        // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
160        $imageInfoData = $pageInfoData['imageinfo'];
161        $revisionsData = $pageInfoData['revisions'];
162        $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle );
163        $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle );
164        $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE );
165        $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY );
166
167        $splitTitle = explode( ':', $pageInfoData['title'] );
168        $titleAfterColon = end( $splitTitle );
169
170        $importDetails = new ImportDetails(
171            $sourceUrl,
172            new TitleValue( NS_FILE, $titleAfterColon ),
173            $textRevisions,
174            $fileRevisions
175        );
176        // FIXME: Better use constructor parameters instead of setters?
177        $importDetails->setPageLanguage( $pageLanguage );
178        $importDetails->setTemplates( $templates );
179        $importDetails->setCategories( $categories );
180
181        return $importDetails;
182    }
183
184    /**
185     * @param array[] $results Result set as returned by the API
186     * @param int $namespace
187     *
188     * @return string[]
189     */
190    private function reduceTitleList( array $results, int $namespace ): array {
191        $titles = [];
192        foreach ( $results as $result ) {
193            if ( $result['ns'] === $namespace ) {
194                $titles[] = $result['title'];
195            }
196        }
197        return $titles;
198    }
199
200    /**
201     * Fetches the next set of revisions unless the number of revisions
202     * exceeds the max revisions limit
203     *
204     * @param SourceUrl $sourceUrl
205     * @param array[] &$requestData
206     * @param array[] &$pageInfoData
207     *
208     * @throws ImportException
209     */
210    private function getMoreRevisions(
211        SourceUrl $sourceUrl,
212        array &$requestData,
213        array &$pageInfoData
214    ): void {
215        $rvContinue = $requestData['continue']['rvcontinue'] ?? null;
216        $iiStart = $requestData['continue']['iistart'] ?? null;
217        $tlContinue = $requestData['continue']['tlcontinue'] ?? null;
218        $clContinue = $requestData['continue']['clcontinue'] ?? null;
219
220        $params = $this->getBaseParams( $sourceUrl );
221
222        if ( $iiStart ) {
223            $params = $this->addFileRevisionsToParams( $params, $iiStart );
224        }
225
226        if ( $rvContinue ) {
227            $params = $this->addTextRevisionsToParams( $params, $rvContinue );
228        }
229
230        if ( $tlContinue ) {
231            $params = $this->addTemplatesToParams( $params, $tlContinue );
232        }
233
234        if ( $clContinue ) {
235            $params = $this->addCategoriesToParams( $params, $clContinue );
236        }
237
238        $requestData = $this->sendApiRequest( $sourceUrl, $params );
239
240        $newPageInfoData = end( $requestData['query']['pages'] );
241
242        if ( array_key_exists( 'revisions', $newPageInfoData ) ) {
243            $pageInfoData['revisions'] =
244                array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] );
245        }
246
247        if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) {
248            $pageInfoData['imageinfo'] =
249                array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
250        }
251
252        if ( array_key_exists( 'templates', $newPageInfoData ) ) {
253            $pageInfoData['templates'] =
254                array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] );
255        }
256
257        if ( array_key_exists( 'categories', $newPageInfoData ) ) {
258            $pageInfoData['categories'] =
259                array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] );
260        }
261
262        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
263        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
264    }
265
266    /**
267     * Throws an exception if the number of revisions to be imported exceeds
268     * the maximum revision limit
269     *
270     * @param SourceUrl $sourceUrl
271     * @param array[] $pageInfoData
272     *
273     * @throws ImportException when exceeding the acceptable maximum
274     */
275    private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void {
276        if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions ||
277            count( $pageInfoData['imageinfo'] ) > $this->maxRevisions ||
278            count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS ||
279            count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) {
280            $this->logger->warning(
281                'Too many revisions were being fetched',
282                [
283                    'sourceUrl' => $sourceUrl->getUrl(),
284                ]
285            );
286
287            throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' );
288        }
289    }
290
291    /**
292     * @param array[] $pageInfoData
293     * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData
294     *
295     * @throws ImportException when exceeding the maximum file size
296     */
297    private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void {
298        $aggregatedFileBytes = 0;
299        foreach ( $pageInfoData['imageinfo'] as $fileVersion ) {
300            $aggregatedFileBytes += $fileVersion['size'] ?? 0;
301            if ( $aggregatedFileBytes > $this->maxAggregatedBytes ||
302                $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) {
303                $versions = count( $pageInfoData['imageinfo'] );
304                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
305            }
306        }
307    }
308
309    /**
310     * @param array[] $imageInfo
311     * @param string $pageTitle
312     *
313     * @throws ImportException when the file is not acceptable, e.g. hidden or to big
314     */
315    private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions {
316        $revisions = [];
317        foreach ( $imageInfo as $revisionInfo ) {
318            if ( array_key_exists( 'filehidden', $revisionInfo ) ) {
319                throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' );
320            }
321
322            if ( array_key_exists( 'filemissing', $revisionInfo ) ) {
323                throw new LocalizedImportException( 'fileimporter-filemissinginrevision' );
324            }
325
326            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
327                $revisionInfo['user'] ??= $this->suppressedUsername;
328            }
329
330            if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) {
331                $versions = count( $imageInfo );
332                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
333            }
334
335            if ( isset( $revisionInfo['sha1'] ) ) {
336                // Convert from API sha1 format to DB sha1 format. The conversion can be se inside
337                // ApiQueryImageInfo.
338                // * API sha1 format is base 16 padded to 40 chars
339                // * DB sha1 format is base 36 padded to 31 chars
340                $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 );
341            }
342
343            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
344                $revisionInfo['comment'] ??=
345                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
346            }
347
348            $revisionInfo['name'] = $pageTitle;
349            $revisionInfo['description'] = $revisionInfo['comment'] ?? null;
350
351            $revisions[] = new FileRevision( $revisionInfo );
352        }
353        return new FileRevisions( $revisions );
354    }
355
356    /**
357     * @param array[] $revisionsInfo
358     * @param string $pageTitle
359     */
360    private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions {
361        $revisions = [];
362        foreach ( $revisionsInfo as $revisionInfo ) {
363            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
364                $revisionInfo['user'] ??= $this->suppressedUsername;
365            }
366
367            if ( array_key_exists( 'texthidden', $revisionInfo ) ) {
368                $revisionInfo['slots'][SlotRecord::MAIN]['content'] ??=
369                    wfMessage( 'fileimporter-revision-removed-text' )->plain();
370            }
371
372            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
373                $revisionInfo['comment'] ??=
374                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
375            }
376
377            $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo );
378            $revisionInfo['title'] = $pageTitle;
379            $revisions[] = new TextRevision( $revisionInfo );
380        }
381        return new TextRevisions( $revisions );
382    }
383
384    private function getBaseParams( SourceUrl $sourceUrl ): array {
385        return [
386            'action' => 'query',
387            'errorformat' => 'plaintext',
388            'format' => 'json',
389            'formatversion' => '2',
390            'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ),
391            'prop' => 'info'
392        ];
393    }
394
395    /**
396     * Adds to params base the properties for getting Text Revisions
397     */
398    private function addTextRevisionsToParams( array $params, string $rvContinue = null ): array {
399        $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions';
400
401        if ( $rvContinue ) {
402            $params['rvcontinue'] = $rvContinue;
403        }
404
405        return $params + [
406            'rvlimit' => static::API_RESULT_LIMIT,
407            'rvdir' => 'newer',
408            'rvslots' => SlotRecord::MAIN,
409            'rvprop' => implode(
410                '|',
411                [
412                    'flags',
413                    'timestamp',
414                    'user',
415                    'sha1',
416                    'contentmodel',
417                    'comment',
418                    'content',
419                    'tags',
420                ]
421            )
422        ];
423    }
424
425    /**
426     * Adds to params base the properties for getting File Revisions
427     */
428    private function addFileRevisionsToParams( array $params, string $iiStart = null ): array {
429        $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo';
430
431        if ( $iiStart ) {
432            $params['iistart'] = $iiStart;
433        }
434
435        return $params + [
436            'iilimit' => static::API_RESULT_LIMIT,
437            'iiurlwidth' => 800,
438            'iiurlheight' => 400,
439            'iiprop' => implode(
440                '|',
441                [
442                    'timestamp',
443                    'user',
444                    'userid',
445                    'comment',
446                    'canonicaltitle',
447                    'url',
448                    'size',
449                    'sha1',
450                    'archivename',
451                ]
452            )
453        ];
454    }
455
456    /**
457     * Adds to params base the properties for getting Templates
458     */
459    private function addTemplatesToParams( array $params, string $tlContinue = null ): array {
460        $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates';
461
462        if ( $tlContinue ) {
463            $params['tlcontinue'] = $tlContinue;
464        }
465
466        return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ];
467    }
468
469    /**
470     * Adds to params base the properties for getting Categories
471     */
472    private function addCategoriesToParams( array $params, string $clContinue = null ): array {
473        $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories';
474
475        if ( $clContinue ) {
476            $params['clcontinue'] = $clContinue;
477        }
478
479        return $params + [ 'cllimit' => static::API_RESULT_LIMIT ];
480    }
481
482}