Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
88.24% covered (warning)
88.24%
195 / 221
42.86% covered (danger)
42.86%
6 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiDetailRetriever
88.24% covered (warning)
88.24%
195 / 221
42.86% covered (danger)
42.86%
6 / 14
64.67
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
2
 sendApiRequest
57.14% covered (warning)
57.14%
4 / 7
0.00% covered (danger)
0.00%
0 / 1
2.31
 getImportDetails
98.21% covered (success)
98.21%
55 / 56
0.00% covered (danger)
0.00%
0 / 1
8
 reduceTitleList
60.00% covered (warning)
60.00%
3 / 5
0.00% covered (danger)
0.00%
0 / 1
3.58
 getMoreRevisions
79.31% covered (warning)
79.31%
23 / 29
0.00% covered (danger)
0.00%
0 / 1
9.72
 checkRevisionCount
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
5
 checkMaxRevisionAggregatedBytes
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getFileRevisionsFromImageInfo
65.00% covered (warning)
65.00%
13 / 20
0.00% covered (danger)
0.00%
0 / 1
10.74
 getTextRevisionsFromRevisionsInfo
64.29% covered (warning)
64.29%
9 / 14
0.00% covered (danger)
0.00%
0 / 1
6.14
 getBaseParams
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 addTextRevisionsToParams
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
3
 addFileRevisionsToParams
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
3
 addTemplatesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 addCategoriesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use FileImporter\Data\FileRevision;
6use FileImporter\Data\FileRevisions;
7use FileImporter\Data\ImportDetails;
8use FileImporter\Data\SourceUrl;
9use FileImporter\Data\TextRevision;
10use FileImporter\Data\TextRevisions;
11use FileImporter\Exceptions\HttpRequestException;
12use FileImporter\Exceptions\ImportException;
13use FileImporter\Exceptions\LocalizedImportException;
14use FileImporter\Interfaces\DetailRetriever;
15use FileImporter\Services\Http\HttpRequestExecutor;
16use MediaWiki\Config\ConfigException;
17use MediaWiki\MediaWikiServices;
18use MediaWiki\Revision\SlotRecord;
19use MediaWiki\Title\TitleValue;
20use Psr\Log\LoggerInterface;
21use Psr\Log\NullLogger;
22
23/**
24 * @license GPL-2.0-or-later
25 * @author Addshore
26 */
27class ApiDetailRetriever implements DetailRetriever {
28    use MediaWikiSourceUrlParser;
29
30    private HttpApiLookup $httpApiLookup;
31    private HttpRequestExecutor $httpRequestExecutor;
32    private int $maxBytes;
33    private LoggerInterface $logger;
34    /**
35     * @var string Placeholder name replacing usernames that have been suppressed as part of
36     * a steward action on the source site.
37     */
38    private $suppressedUsername;
39    private int $maxRevisions;
40    private int $maxAggregatedBytes;
41
42    private const API_RESULT_LIMIT = 500;
43    private const MAX_REVISIONS = 100;
44    private const MAX_AGGREGATED_BYTES = 250000000;
45
46    /**
47     * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid
48     */
49    public function __construct(
50        HttpApiLookup $httpApiLookup,
51        HttpRequestExecutor $httpRequestExecutor,
52        int $maxBytes,
53        ?LoggerInterface $logger = null
54    ) {
55        $this->httpApiLookup = $httpApiLookup;
56        $this->httpRequestExecutor = $httpRequestExecutor;
57        $this->maxBytes = $maxBytes;
58        $this->logger = $logger ?? new NullLogger();
59
60        $config = MediaWikiServices::getInstance()->getMainConfig();
61
62        $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' );
63        $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' );
64        $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' );
65        if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) {
66            throw new ConfigException(
67                'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' .
68                $this->suppressedUsername . '"'
69            );
70        }
71    }
72
73    /**
74     * @return array[]
75     * @throws ImportException when the request failed
76     */
77    private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) {
78        $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
79
80        try {
81            $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
82        } catch ( HttpRequestException $e ) {
83            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
84                $apiUrl ], $e );
85        }
86        $requestData = json_decode( $imageInfoRequest->getContent(), true );
87        return $requestData;
88    }
89
90    /**
91     * @throws ImportException e.g. when the file couldn't be found
92     */
93    public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails {
94        $params = $this->getBaseParams( $sourceUrl );
95        $params = $this->addFileRevisionsToParams( $params );
96        $params = $this->addTextRevisionsToParams( $params );
97        $params = $this->addTemplatesToParams( $params );
98        $params = $this->addCategoriesToParams( $params );
99
100        $requestData = $this->sendApiRequest( $sourceUrl, $params );
101
102        if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) {
103            $this->logger->warning(
104                'No pages returned by the API',
105                [
106                    'sourceUrl' => $sourceUrl->getUrl(),
107                    'apiParameters' => $params,
108                ]
109            );
110            throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' );
111        }
112
113        /** @var array $pageInfoData */
114        $pageInfoData = end( $requestData['query']['pages'] );
115        '@phan-var array $pageInfoData';
116
117        if ( array_key_exists( 'missing', $pageInfoData ) ) {
118            if (
119                array_key_exists( 'imagerepository', $pageInfoData ) &&
120                $pageInfoData['imagerepository'] == 'shared'
121            ) {
122                throw new LocalizedImportException(
123                    [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ]
124                );
125            }
126            throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' );
127        }
128
129        if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) {
130            $this->logger->warning(
131                'Bad image or revision info returned by the API',
132                [
133                    'sourceUrl' => $sourceUrl->getUrl(),
134                    'apiParameters' => $params,
135                ]
136            );
137            throw new LocalizedImportException( 'fileimporter-api-badinfo' );
138        }
139
140        // FIXME: Isn't this misplaced here, *before* more revisions are fetched?
141        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
142        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
143
144        while ( array_key_exists( 'continue', $requestData ) ) {
145            $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData );
146        }
147
148        $pageTitle = $pageInfoData['title'];
149        $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null;
150
151        // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
152        $imageInfoData = $pageInfoData['imageinfo'];
153        $revisionsData = $pageInfoData['revisions'];
154        $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle );
155        $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle );
156        $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE );
157        $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY );
158
159        $splitTitle = explode( ':', $pageInfoData['title'] );
160        $titleAfterColon = end( $splitTitle );
161
162        $importDetails = new ImportDetails(
163            $sourceUrl,
164            new TitleValue( NS_FILE, $titleAfterColon ),
165            $textRevisions,
166            $fileRevisions
167        );
168        // FIXME: Better use constructor parameters instead of setters?
169        $importDetails->setPageLanguage( $pageLanguage );
170        $importDetails->setTemplates( $templates );
171        $importDetails->setCategories( $categories );
172
173        return $importDetails;
174    }
175
176    /**
177     * @param array[] $results Result set as returned by the API
178     * @param int $namespace
179     *
180     * @return string[]
181     */
182    private function reduceTitleList( array $results, int $namespace ): array {
183        $titles = [];
184        foreach ( $results as $result ) {
185            if ( $result['ns'] === $namespace ) {
186                $titles[] = $result['title'];
187            }
188        }
189        return $titles;
190    }
191
192    /**
193     * Fetches the next set of revisions unless the number of revisions
194     * exceeds the max revisions limit
195     *
196     * @param SourceUrl $sourceUrl
197     * @param array[] &$requestData
198     * @param array[] &$pageInfoData
199     *
200     * @throws ImportException
201     */
202    private function getMoreRevisions(
203        SourceUrl $sourceUrl,
204        array &$requestData,
205        array &$pageInfoData
206    ): void {
207        $rvContinue = $requestData['continue']['rvcontinue'] ?? null;
208        $iiStart = $requestData['continue']['iistart'] ?? null;
209        $tlContinue = $requestData['continue']['tlcontinue'] ?? null;
210        $clContinue = $requestData['continue']['clcontinue'] ?? null;
211
212        $params = $this->getBaseParams( $sourceUrl );
213
214        if ( $iiStart ) {
215            $params = $this->addFileRevisionsToParams( $params, $iiStart );
216        }
217
218        if ( $rvContinue ) {
219            $params = $this->addTextRevisionsToParams( $params, $rvContinue );
220        }
221
222        if ( $tlContinue ) {
223            $params = $this->addTemplatesToParams( $params, $tlContinue );
224        }
225
226        if ( $clContinue ) {
227            $params = $this->addCategoriesToParams( $params, $clContinue );
228        }
229
230        $requestData = $this->sendApiRequest( $sourceUrl, $params );
231
232        $newPageInfoData = end( $requestData['query']['pages'] );
233
234        if ( array_key_exists( 'revisions', $newPageInfoData ) ) {
235            $pageInfoData['revisions'] =
236                array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] );
237        }
238
239        if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) {
240            $pageInfoData['imageinfo'] =
241                array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
242        }
243
244        if ( array_key_exists( 'templates', $newPageInfoData ) ) {
245            $pageInfoData['templates'] =
246                array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] );
247        }
248
249        if ( array_key_exists( 'categories', $newPageInfoData ) ) {
250            $pageInfoData['categories'] =
251                array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] );
252        }
253
254        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
255        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
256    }
257
258    /**
259     * Throws an exception if the number of revisions to be imported exceeds
260     * the maximum revision limit
261     *
262     * @param SourceUrl $sourceUrl
263     * @param array[] $pageInfoData
264     *
265     * @throws ImportException when exceeding the acceptable maximum
266     */
267    private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void {
268        if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions ||
269            count( $pageInfoData['imageinfo'] ) > $this->maxRevisions ||
270            count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS ||
271            count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) {
272            $this->logger->warning(
273                'Too many revisions were being fetched',
274                [
275                    'sourceUrl' => $sourceUrl->getUrl(),
276                ]
277            );
278
279            throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' );
280        }
281    }
282
283    /**
284     * @param array[] $pageInfoData
285     * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData
286     *
287     * @throws ImportException when exceeding the maximum file size
288     */
289    private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void {
290        $aggregatedFileBytes = 0;
291        foreach ( $pageInfoData['imageinfo'] as $fileVersion ) {
292            $aggregatedFileBytes += $fileVersion['size'] ?? 0;
293            if ( $aggregatedFileBytes > $this->maxAggregatedBytes ||
294                $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) {
295                $versions = count( $pageInfoData['imageinfo'] );
296                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
297            }
298        }
299    }
300
301    /**
302     * @param array[] $imageInfo
303     * @param string $pageTitle
304     *
305     * @throws ImportException when the file is not acceptable, e.g. hidden or to big
306     */
307    private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions {
308        $revisions = [];
309        foreach ( $imageInfo as $revisionInfo ) {
310            if ( array_key_exists( 'filehidden', $revisionInfo ) ) {
311                throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' );
312            }
313
314            if ( array_key_exists( 'filemissing', $revisionInfo ) ) {
315                throw new LocalizedImportException( 'fileimporter-filemissinginrevision' );
316            }
317
318            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
319                $revisionInfo['user'] ??= $this->suppressedUsername;
320            }
321
322            if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) {
323                $versions = count( $imageInfo );
324                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
325            }
326
327            if ( isset( $revisionInfo['sha1'] ) ) {
328                // Convert from API sha1 format to DB sha1 format. The conversion can be se inside
329                // ApiQueryImageInfo.
330                // * API sha1 format is base 16 padded to 40 chars
331                // * DB sha1 format is base 36 padded to 31 chars
332                $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 );
333            }
334
335            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
336                $revisionInfo['comment'] ??=
337                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
338            }
339
340            $revisionInfo['name'] = $pageTitle;
341            $revisionInfo['description'] = $revisionInfo['comment'] ?? null;
342
343            $revisions[] = new FileRevision( $revisionInfo );
344        }
345        return new FileRevisions( $revisions );
346    }
347
348    /**
349     * @param array[] $revisionsInfo
350     * @param string $pageTitle
351     */
352    private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions {
353        $revisions = [];
354        foreach ( $revisionsInfo as $revisionInfo ) {
355            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
356                $revisionInfo['user'] ??= $this->suppressedUsername;
357            }
358
359            if ( array_key_exists( 'texthidden', $revisionInfo ) ) {
360                $revisionInfo['slots'][SlotRecord::MAIN]['content'] ??=
361                    wfMessage( 'fileimporter-revision-removed-text' )->plain();
362            }
363
364            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
365                $revisionInfo['comment'] ??=
366                    wfMessage( 'fileimporter-revision-removed-comment' )->plain();
367            }
368
369            $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo );
370            $revisionInfo['title'] = $pageTitle;
371            $revisions[] = new TextRevision( $revisionInfo );
372        }
373        return new TextRevisions( $revisions );
374    }
375
376    private function getBaseParams( SourceUrl $sourceUrl ): array {
377        return [
378            'action' => 'query',
379            'errorformat' => 'plaintext',
380            'format' => 'json',
381            'formatversion' => '2',
382            'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ),
383            'prop' => 'info'
384        ];
385    }
386
387    /**
388     * Adds to params base the properties for getting Text Revisions
389     */
390    private function addTextRevisionsToParams( array $params, ?string $rvContinue = null ): array {
391        $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions';
392
393        if ( $rvContinue ) {
394            $params['rvcontinue'] = $rvContinue;
395        }
396
397        return $params + [
398            'rvlimit' => static::API_RESULT_LIMIT,
399            'rvdir' => 'newer',
400            'rvslots' => SlotRecord::MAIN,
401            'rvprop' => implode(
402                '|',
403                [
404                    'flags',
405                    'timestamp',
406                    'user',
407                    'sha1',
408                    'contentmodel',
409                    'comment',
410                    'content',
411                    'tags',
412                ]
413            )
414        ];
415    }
416
417    /**
418     * Adds to params base the properties for getting File Revisions
419     */
420    private function addFileRevisionsToParams( array $params, ?string $iiStart = null ): array {
421        $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo';
422
423        if ( $iiStart ) {
424            $params['iistart'] = $iiStart;
425        }
426
427        return $params + [
428            'iilimit' => static::API_RESULT_LIMIT,
429            'iiurlwidth' => 800,
430            'iiurlheight' => 400,
431            'iiprop' => implode(
432                '|',
433                [
434                    'timestamp',
435                    'user',
436                    'userid',
437                    'comment',
438                    'canonicaltitle',
439                    'url',
440                    'size',
441                    'sha1',
442                    'archivename',
443                ]
444            )
445        ];
446    }
447
448    /**
449     * Adds to params base the properties for getting Templates
450     */
451    private function addTemplatesToParams( array $params, ?string $tlContinue = null ): array {
452        $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates';
453
454        if ( $tlContinue ) {
455            $params['tlcontinue'] = $tlContinue;
456        }
457
458        return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ];
459    }
460
461    /**
462     * Adds to params base the properties for getting Categories
463     */
464    private function addCategoriesToParams( array $params, ?string $clContinue = null ): array {
465        $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories';
466
467        if ( $clContinue ) {
468            $params['clcontinue'] = $clContinue;
469        }
470
471        return $params + [ 'cllimit' => static::API_RESULT_LIMIT ];
472    }
473
474}