Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
83.33% covered (warning)
83.33%
135 / 162
42.86% covered (danger)
42.86%
6 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiDetailRetriever
83.33% covered (warning)
83.33%
135 / 162
42.86% covered (danger)
42.86%
6 / 14
79.80
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
3
 sendApiRequest
66.67% covered (warning)
66.67%
4 / 6
0.00% covered (danger)
0.00%
0 / 1
2.15
 getImportDetails
97.56% covered (success)
97.56%
40 / 41
0.00% covered (danger)
0.00%
0 / 1
9
 reduceTitleList
66.67% covered (warning)
66.67%
4 / 6
0.00% covered (danger)
0.00%
0 / 1
1.04
 getMoreRevisions
79.31% covered (warning)
79.31%
23 / 29
0.00% covered (danger)
0.00%
0 / 1
9.72
 checkRevisionCount
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
5
 checkMaxRevisionAggregatedBytes
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 getFileRevisionsFromImageInfo
65.00% covered (warning)
65.00%
13 / 20
0.00% covered (danger)
0.00%
0 / 1
12.47
 getTextRevisionsFromRevisionsInfo
61.11% covered (warning)
61.11%
11 / 18
0.00% covered (danger)
0.00%
0 / 1
9.88
 getBaseParams
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 addTextRevisionsToParams
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
3
 addFileRevisionsToParams
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
3
 addTemplatesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 addCategoriesToParams
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use ConfigException;
6use FileImporter\Data\FileRevision;
7use FileImporter\Data\FileRevisions;
8use FileImporter\Data\ImportDetails;
9use FileImporter\Data\SourceUrl;
10use FileImporter\Data\TextRevision;
11use FileImporter\Data\TextRevisions;
12use FileImporter\Exceptions\HttpRequestException;
13use FileImporter\Exceptions\ImportException;
14use FileImporter\Exceptions\LocalizedImportException;
15use FileImporter\Interfaces\DetailRetriever;
16use FileImporter\Services\Http\HttpRequestExecutor;
17use MediaWiki\MediaWikiServices;
18use Psr\Log\LoggerInterface;
19use Psr\Log\NullLogger;
20use TitleValue;
21
22/**
23 * @license GPL-2.0-or-later
24 * @author Addshore
25 */
26class ApiDetailRetriever implements DetailRetriever {
27    use MediaWikiSourceUrlParser;
28
29    /**
30     * @var HttpApiLookup
31     */
32    private $httpApiLookup;
33
34    /**
35     * @var HttpRequestExecutor
36     */
37    private $httpRequestExecutor;
38
39    /**
40     * @var int
41     */
42    private $maxBytes;
43
44    /**
45     * @var LoggerInterface
46     */
47    private $logger;
48
49    /**
50     * @var string Placeholder name replacing usernames that have been suppressed as part of
51     * a steward action on the source site.
52     */
53    private $suppressedUsername;
54
55    /**
56     * @var int
57     */
58    private $maxRevisions;
59
60    /**
61     * @var int
62     */
63    private $maxAggregatedBytes;
64
65    private const API_RESULT_LIMIT = 500;
66    private const MAX_REVISIONS = 100;
67    private const MAX_AGGREGATED_BYTES = 250000000;
68
69    /**
70     * @param HttpApiLookup $httpApiLookup
71     * @param HttpRequestExecutor $httpRequestExecutor
72     * @param int $maxBytes
73     * @param LoggerInterface|null $logger
74     *
75     * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid
76     */
77    public function __construct(
78        HttpApiLookup $httpApiLookup,
79        HttpRequestExecutor $httpRequestExecutor,
80        $maxBytes,
81        LoggerInterface $logger = null
82    ) {
83        $this->httpApiLookup = $httpApiLookup;
84        $this->httpRequestExecutor = $httpRequestExecutor;
85        $this->maxBytes = $maxBytes;
86        $this->logger = $logger ?: new NullLogger();
87
88        $config = MediaWikiServices::getInstance()->getMainConfig();
89
90        $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' );
91        $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' );
92        $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' );
93        if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) {
94            throw new ConfigException(
95                'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' .
96                $this->suppressedUsername . '"'
97            );
98        }
99    }
100
101    /**
102     * @param SourceUrl $sourceUrl
103     * @param array $apiParameters
104     *
105     * @return array[]
106     * @throws ImportException when the request failed
107     */
108    private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) {
109        $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
110
111        try {
112            $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
113        } catch ( HttpRequestException $e ) {
114            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
115                $apiUrl ], $e );
116        }
117        $requestData = json_decode( $imageInfoRequest->getContent(), true );
118        return $requestData;
119    }
120
121    /**
122     * @param SourceUrl $sourceUrl
123     *
124     * @return ImportDetails
125     * @throws ImportException e.g. when the file couldn't be found
126     */
127    public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails {
128        $params = $this->getBaseParams( $sourceUrl );
129        $params = $this->addFileRevisionsToParams( $params );
130        $params = $this->addTextRevisionsToParams( $params );
131        $params = $this->addTemplatesToParams( $params );
132        $params = $this->addCategoriesToParams( $params );
133
134        $requestData = $this->sendApiRequest( $sourceUrl, $params );
135
136        if ( !isset( $requestData['query'] ) || count( $requestData['query']['pages'] ) !== 1 ) {
137            $this->logger->warning(
138                'No pages returned by the API',
139                [
140                    'sourceUrl' => $sourceUrl->getUrl(),
141                    'apiParameters' => $params,
142                ]
143            );
144            throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' );
145        }
146
147        /** @var array $pageInfoData */
148        $pageInfoData = end( $requestData['query']['pages'] );
149        '@phan-var array $pageInfoData';
150
151        if ( array_key_exists( 'missing', $pageInfoData ) ) {
152            if (
153                array_key_exists( 'imagerepository', $pageInfoData ) &&
154                $pageInfoData['imagerepository'] == 'shared'
155            ) {
156                throw new LocalizedImportException(
157                    [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ]
158                );
159            }
160            throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' );
161        }
162
163        if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) {
164            $this->logger->warning(
165                'Bad image or revision info returned by the API',
166                [
167                    'sourceUrl' => $sourceUrl->getUrl(),
168                    'apiParameters' => $params,
169                ]
170            );
171            throw new LocalizedImportException( 'fileimporter-api-badinfo' );
172        }
173
174        // FIXME: Isn't this misplaced here, *before* more revisions are fetched?
175        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
176        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
177
178        while ( array_key_exists( 'continue', $requestData ) ) {
179            $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData );
180        }
181
182        $pageTitle = $pageInfoData['title'];
183        $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null;
184
185        $imageInfoData = $pageInfoData['imageinfo'];
186        $revisionsData = $pageInfoData['revisions'];
187        $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle );
188        $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle );
189        $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE );
190        $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY );
191
192        $splitTitle = explode( ':', $pageInfoData['title'] );
193        $titleAfterColon = end( $splitTitle );
194
195        $importDetails = new ImportDetails(
196            $sourceUrl,
197            new TitleValue( NS_FILE, $titleAfterColon ),
198            $textRevisions,
199            $fileRevisions
200        );
201        // FIXME: Better use constructor parameters instead of setters?
202        $importDetails->setPageLanguage( $pageLanguage );
203        $importDetails->setTemplates( $templates );
204        $importDetails->setCategories( $categories );
205
206        return $importDetails;
207    }
208
209    /**
210     * @param array[] $titles
211     * @param int $namespace
212     *
213     * @return string[]
214     */
215    private function reduceTitleList( array $titles, $namespace ) {
216        return array_map(
217            static function ( array $title ) {
218                return $title['title'];
219            },
220            array_filter(
221                $titles,
222                static function ( array $title ) use ( $namespace ) {
223                    return $title['ns'] === $namespace;
224                }
225            )
226        );
227    }
228
229    /**
230     * Fetches the next set of revisions unless the number of revisions
231     * exceeds the max revisions limit
232     *
233     * @param SourceUrl $sourceUrl
234     * @param array[] &$requestData
235     * @param array[] &$pageInfoData
236     *
237     * @throws ImportException
238     */
239    private function getMoreRevisions(
240        SourceUrl $sourceUrl,
241        array &$requestData,
242        array &$pageInfoData
243    ) {
244        $rvContinue = $requestData['continue']['rvcontinue'] ?? null;
245        $iiStart = $requestData['continue']['iistart'] ?? null;
246        $tlContinue = $requestData['continue']['tlcontinue'] ?? null;
247        $clContinue = $requestData['continue']['clcontinue'] ?? null;
248
249        $params = $this->getBaseParams( $sourceUrl );
250
251        if ( $iiStart ) {
252            $params = $this->addFileRevisionsToParams( $params, $iiStart );
253        }
254
255        if ( $rvContinue ) {
256            $params = $this->addTextRevisionsToParams( $params, $rvContinue );
257        }
258
259        if ( $tlContinue ) {
260            $params = $this->addTemplatesToParams( $params, $tlContinue );
261        }
262
263        if ( $clContinue ) {
264            $params = $this->addCategoriesToParams( $params, $clContinue );
265        }
266
267        $requestData = $this->sendApiRequest( $sourceUrl, $params );
268
269        $newPageInfoData = end( $requestData['query']['pages'] );
270
271        if ( array_key_exists( 'revisions', $newPageInfoData ) ) {
272            $pageInfoData['revisions'] =
273                array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] );
274        }
275
276        if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) {
277            $pageInfoData['imageinfo'] =
278                array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
279        }
280
281        if ( array_key_exists( 'templates', $newPageInfoData ) ) {
282            $pageInfoData['templates'] =
283                array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] );
284        }
285
286        if ( array_key_exists( 'categories', $newPageInfoData ) ) {
287            $pageInfoData['categories'] =
288                array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] );
289        }
290
291        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
292        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
293    }
294
295    /**
296     * Throws an exception if the number of revisions to be imported exceeds
297     * the maximum revision limit
298     *
299     * @param SourceUrl $sourceUrl
300     * @param array[] $pageInfoData
301     *
302     * @throws ImportException when exceeding the acceptable maximum
303     */
304    private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ) {
305        if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions ||
306            count( $pageInfoData['imageinfo'] ) > $this->maxRevisions ||
307            count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS ||
308            count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) {
309            $this->logger->warning(
310                'Too many revisions were being fetched',
311                [
312                    'sourceUrl' => $sourceUrl->getUrl(),
313                ]
314            );
315
316            throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' );
317        }
318    }
319
320    /**
321     * @param array[] $pageInfoData
322     * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData
323     *
324     * @throws ImportException when exceeding the maximum file size
325     */
326    private function checkMaxRevisionAggregatedBytes( array $pageInfoData ) {
327        $aggregatedFileBytes = 0;
328        foreach ( $pageInfoData['imageinfo'] as $fileVersion ) {
329            $aggregatedFileBytes += $fileVersion['size'] ?? 0;
330            if ( $aggregatedFileBytes > $this->maxAggregatedBytes ||
331                $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) {
332                $versions = count( $pageInfoData['imageinfo'] );
333                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
334            }
335        }
336    }
337
338    /**
339     * @param array[] $imageInfo
340     * @param string $pageTitle
341     *
342     * @return FileRevisions
343     * @throws ImportException when the file is not acceptable, e.g. hidden or to big
344     */
345    private function getFileRevisionsFromImageInfo( array $imageInfo, $pageTitle ) {
346        $revisions = [];
347        foreach ( $imageInfo as $revisionInfo ) {
348            if ( array_key_exists( 'filehidden', $revisionInfo ) ) {
349                throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' );
350            }
351
352            if ( array_key_exists( 'filemissing', $revisionInfo ) ) {
353                throw new LocalizedImportException( 'fileimporter-filemissinginrevision' );
354            }
355
356            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
357                $revisionInfo['user'] = $this->suppressedUsername;
358            }
359
360            if ( isset( $revisionInfo['size'] ) && $revisionInfo['size'] > $this->maxBytes ) {
361                $versions = count( $imageInfo );
362                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
363            }
364
365            if ( isset( $revisionInfo['sha1'] ) ) {
366                // Convert from API sha1 format to DB sha1 format. The conversion can be se inside
367                // ApiQueryImageInfo.
368                // * API sha1 format is base 16 padded to 40 chars
369                // * DB sha1 format is base 36 padded to 31 chars
370                $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 );
371            }
372
373            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
374                $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' )
375                    ->plain();
376            }
377
378            $revisionInfo['name'] = $pageTitle;
379            $revisionInfo['description'] = $revisionInfo['comment'] ?? null;
380
381            $revisions[] = new FileRevision( $revisionInfo );
382        }
383        return new FileRevisions( $revisions );
384    }
385
386    /**
387     * @param array[] $revisionsInfo
388     * @param string $pageTitle
389     *
390     * @return TextRevisions
391     */
392    private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, $pageTitle ) {
393        $revisions = [];
394        foreach ( $revisionsInfo as $revisionInfo ) {
395            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
396                $revisionInfo['user'] = $this->suppressedUsername;
397            }
398
399            if ( array_key_exists( 'texthidden', $revisionInfo ) ) {
400                $revisionInfo['*'] = wfMessage( 'fileimporter-revision-removed-text' )
401                    ->plain();
402            }
403
404            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
405                $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' )
406                    ->plain();
407            }
408
409            if ( !array_key_exists( 'contentmodel', $revisionInfo ) ) {
410                $revisionInfo['contentmodel'] = CONTENT_MODEL_WIKITEXT;
411            }
412
413            if ( !array_key_exists( 'contentformat', $revisionInfo ) ) {
414                $revisionInfo['contentformat'] = CONTENT_FORMAT_WIKITEXT;
415            }
416
417            $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo );
418            $revisionInfo['title'] = $pageTitle;
419            $revisions[] = new TextRevision( $revisionInfo );
420        }
421        return new TextRevisions( $revisions );
422    }
423
424    /**
425     * @param SourceUrl $sourceUrl
426     * @return string[]
427     */
428    private function getBaseParams( SourceUrl $sourceUrl ) {
429        return [
430            'action' => 'query',
431            'format' => 'json',
432            'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ),
433            'prop' => 'info'
434        ];
435    }
436
437    /**
438     * Adds to params base the properties for getting Text Revisions
439     *
440     * @param array $params
441     * @param string|null $rvContinue
442     *
443     * @return array
444     */
445    private function addTextRevisionsToParams( array $params, $rvContinue = null ) {
446        $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions';
447
448        if ( $rvContinue ) {
449            $params['rvcontinue'] = $rvContinue;
450        }
451
452        return $params + [
453            'rvlimit' => static::API_RESULT_LIMIT,
454            'rvdir' => 'newer',
455            'rvprop' => implode(
456                '|',
457                [
458                    'flags',
459                    'timestamp',
460                    'user',
461                    'sha1',
462                    'contentmodel',
463                    'comment',
464                    'content',
465                    'tags',
466                ]
467            )
468        ];
469    }
470
471    /**
472     * Adds to params base the properties for getting File Revisions
473     *
474     * @param array $params
475     * @param string|null $iiStart
476     *
477     * @return array
478     */
479    private function addFileRevisionsToParams( array $params, $iiStart = null ) {
480        $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo';
481
482        if ( $iiStart ) {
483            $params['iistart'] = $iiStart;
484        }
485
486        return $params + [
487            'iilimit' => static::API_RESULT_LIMIT,
488            'iiurlwidth' => 800,
489            'iiurlheight' => 400,
490            'iiprop' => implode(
491                '|',
492                [
493                    'timestamp',
494                    'user',
495                    'userid',
496                    'comment',
497                    'canonicaltitle',
498                    'url',
499                    'size',
500                    'sha1',
501                    'archivename',
502                ]
503            )
504        ];
505    }
506
507    /**
508     * Adds to params base the properties for getting Templates
509     *
510     * @param array $params
511     * @param string|null $tlContinue
512     *
513     * @return array
514     */
515    private function addTemplatesToParams( array $params, $tlContinue = null ) {
516        $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates';
517
518        if ( $tlContinue ) {
519            $params['tlcontinue'] = $tlContinue;
520        }
521
522        return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ];
523    }
524
525    /**
526     * Adds to params base the properties for getting Categories
527     *
528     * @param array $params
529     * @param string|null $clContinue
530     *
531     * @return array
532     */
533    private function addCategoriesToParams( array $params, $clContinue = null ) {
534        $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories';
535
536        if ( $clContinue ) {
537            $params['clcontinue'] = $clContinue;
538        }
539
540        return $params + [ 'cllimit' => static::API_RESULT_LIMIT ];
541    }
542
543}