Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00% covered (danger)
0.00%
0 / 1
43.75% covered (danger)
43.75%
7 / 16
CRAP
83.94% covered (warning)
83.94%
162 / 193
FileImporter\Remote\MediaWiki\ApiDetailRetriever
0.00% covered (danger)
0.00%
0 / 1
43.75% covered (danger)
43.75%
7 / 16
79.45
83.94% covered (warning)
83.94%
162 / 193
 __construct
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
12 / 12
 sendApiRequest
0.00% covered (danger)
0.00%
0 / 1
2.31
57.14% covered (warning)
57.14%
4 / 7
 getImportDetails
0.00% covered (danger)
0.00%
0 / 1
8
97.87% covered (success)
97.87%
46 / 47
 reduceTitleList
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
1 / 1
 anonymousFunction:213#1201
0.00% covered (danger)
0.00%
0 / 1
1.12
50.00% covered (danger)
50.00%
1 / 2
 anonymousFunction:218#1230
0.00% covered (danger)
0.00%
0 / 1
1.12
50.00% covered (danger)
50.00%
1 / 2
 getMoreRevisions
0.00% covered (danger)
0.00%
0 / 1
9.65
80.00% covered (warning)
80.00%
24 / 30
 checkRevisionCount
100.00% covered (success)
100.00%
1 / 1
5
100.00% covered (success)
100.00%
9 / 9
 checkMaxRevisionAggregatedBytes
100.00% covered (success)
100.00%
1 / 1
4
100.00% covered (success)
100.00%
8 / 8
 getFileRevisionsFromImageInfo
0.00% covered (danger)
0.00%
0 / 1
12.41
65.22% covered (warning)
65.22%
15 / 23
 getTextRevisionsFromRevisionsInfo
0.00% covered (danger)
0.00%
0 / 1
13.04
57.14% covered (warning)
57.14%
12 / 21
 getBaseParams
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
4 / 4
 addTextRevisionsToParams
100.00% covered (success)
100.00%
1 / 1
3
100.00% covered (success)
100.00%
8 / 8
 addFileRevisionsToParams
100.00% covered (success)
100.00%
1 / 1
3
100.00% covered (success)
100.00%
9 / 9
 addTemplatesToParams
0.00% covered (danger)
0.00%
0 / 1
3.14
75.00% covered (warning)
75.00%
3 / 4
 addCategoriesToParams
0.00% covered (danger)
0.00%
0 / 1
3.14
75.00% covered (warning)
75.00%
3 / 4
<?php
namespace FileImporter\Remote\MediaWiki;
use FileImporter\Data\FileRevision;
use FileImporter\Data\FileRevisions;
use FileImporter\Data\ImportDetails;
use FileImporter\Data\SourceUrl;
use FileImporter\Data\TextRevision;
use FileImporter\Data\TextRevisions;
use FileImporter\Exceptions\HttpRequestException;
use FileImporter\Exceptions\ImportException;
use FileImporter\Exceptions\LocalizedImportException;
use FileImporter\Interfaces\DetailRetriever;
use FileImporter\Services\Http\HttpRequestExecutor;
use Psr\Log\LoggerInterface;
use Title;
use MediaWiki\MediaWikiServices;
use ConfigException;
use User;
/**
 * @license GPL-2.0-or-later
 * @author Addshore
 */
class ApiDetailRetriever implements DetailRetriever {
    use MediaWikiSourceUrlParser;
    /**
     * @var HttpApiLookup
     */
    private $httpApiLookup;
    /**
     * @var HttpRequestExecutor
     */
    private $httpRequestExecutor;
    /**
     * @var LoggerInterface
     */
    private $logger;
    /**
     * @var int
     */
    private $maxBytes;
    /**
     * @var string Placeholder name replacing usernames that have been suppressed as part of
     * a steward action on the source site.
     */
    private $suppressedUsername;
    /**
     * @var int
     */
    private $maxRevisions;
    /**
    * @var int
    */
    private $maxAggregatedBytes;
    const API_RESULT_LIMIT = 500;
    const MAX_REVISIONS = 100;
    const MAX_AGGREGATED_BYTES = 250000000;
    /**
     * @param HttpApiLookup $httpApiLookup
     * @param HttpRequestExecutor $httpRequestExecutor
     * @param LoggerInterface $logger
     * @param int $maxBytes
     *
     * @throws ConfigException
     */
    public function __construct(
        HttpApiLookup $httpApiLookup,
        HttpRequestExecutor $httpRequestExecutor,
        LoggerInterface $logger,
        $maxBytes
    ) {
        $this->httpApiLookup = $httpApiLookup;
        $this->httpRequestExecutor = $httpRequestExecutor;
        $this->logger = $logger;
        $this->maxBytes = $maxBytes;
        $config = MediaWikiServices::getInstance()->getMainConfig();
        $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' );
        $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' );
        $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' );
        if ( !User::isValidUserName( $this->suppressedUsername ) ) {
            throw new ConfigException(
                'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' .
                $this->suppressedUsername . '"'
            );
        }
    }
    /**
     * @param SourceUrl $sourceUrl
     * @param array $apiParameters
     *
     * @return array[]
     * @throws ImportException
     */
    private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) {
        $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
        try {
            $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
        } catch ( HttpRequestException $e ) {
            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
                $apiUrl ] );
        }
        $requestData = json_decode( $imageInfoRequest->getContent(), true );
        return $requestData;
    }
    /**
     * @param SourceUrl $sourceUrl
     *
     * @return ImportDetails
     * @throws ImportException
     */
    public function getImportDetails( SourceUrl $sourceUrl ) {
        $params = $this->getBaseParams( $sourceUrl );
        $params = $this->addFileRevisionsToParams( $params );
        $params = $this->addTextRevisionsToParams( $params );
        $params = $this->addTemplatesToParams( $params );
        $params = $this->addCategoriesToParams( $params );
        $requestData = $this->sendApiRequest( $sourceUrl, $params );
        if ( count( $requestData['query']['pages'] ) !== 1 ) {
            $this->logger->warning(
                'No pages returned by the API',
                [
                    'sourceUrl' => $sourceUrl->getUrl(),
                    'apiParameters' => $params,
                ]
            );
            throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' );
        }
        $pageInfoData = end( $requestData['query']['pages'] );
        if ( array_key_exists( 'missing', $pageInfoData ) ) {
            if (
                array_key_exists( 'imagerepository', $pageInfoData ) &&
                $pageInfoData['imagerepository'] == 'shared'
            ) {
                throw new LocalizedImportException( 'fileimporter-cantimportfromsharedrepo' );
            }
            throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' );
        }
        if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) {
            $this->logger->warning(
                'Bad image or revision info returned by the API',
                [
                    'sourceUrl' => $sourceUrl->getUrl(),
                    'apiParameters' => $params,
                ]
            );
            throw new LocalizedImportException( 'fileimporter-api-badinfo' );
        }
        // FIXME: Isn't this misplaced here, *before* more revisions are fetched?
        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
        while ( array_key_exists( 'continue', $requestData ) ) {
            $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData );
        }
        $pageTitle = $pageInfoData['title'];
        $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null;
        $imageInfoData = $pageInfoData['imageinfo'];
        $revisionsData = $pageInfoData['revisions'];
        $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle );
        $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle );
        $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE );
        $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY );
        $splitTitle = explode( ':', $pageInfoData['title'] );
        $titleAfterColon = end( $splitTitle );
        $importDetails = new ImportDetails(
            $sourceUrl,
            Title::makeTitleSafe( NS_FILE, $titleAfterColon ),
            $textRevisions,
            $fileRevisions
        );
        // FIXME: Better use constructor parameters instead of setters?
        $importDetails->setPageLanguage( $pageLanguage );
        $importDetails->setTemplates( $templates );
        $importDetails->setCategories( $categories );
        return $importDetails;
    }
    /**
     * @param array[] $titles
     * @param int $namespace
     *
     * @return string[]
     */
    private function reduceTitleList( array $titles, $namespace ) {
        return array_map(
            function ( array $title ) {
                return $title['title'];
            },
            array_filter(
                $titles,
                function ( array $title ) use ( $namespace ) {
                    return $title['ns'] === $namespace;
                }
            )
        );
    }
    /**
     * Fetches the next set of revisions unless the number of revisions
     * exceeds the max revisions limit
     *
     * @param SourceUrl $sourceUrl
     * @param array[] &$requestData
     * @param array[] &$pageInfoData
     *
     * @throws ImportException
     */
    private function getMoreRevisions(
        SourceUrl $sourceUrl,
        array &$requestData,
        array &$pageInfoData
    ) {
        $rvContinue = $requestData['continue']['rvcontinue'] ?? null;
        $iiStart = $requestData['continue']['iistart'] ?? null;
        $tlContinue = $requestData['continue']['tlcontinue'] ?? null;
        $clContinue = $requestData['continue']['clcontinue'] ?? null;
        $params = $this->getBaseParams( $sourceUrl );
        if ( $iiStart ) {
            $params = $this->addFileRevisionsToParams( $params, $iiStart );
        }
        if ( $rvContinue ) {
            $params = $this->addTextRevisionsToParams( $params, $rvContinue );
        }
        if ( $tlContinue ) {
            $params = $this->addTemplatesToParams( $params, $tlContinue );
        }
        if ( $clContinue ) {
            $params = $this->addCategoriesToParams( $params, $clContinue );
        }
        $requestData = $this->sendApiRequest( $sourceUrl, $params );
        $newPageInfoData = end( $requestData['query']['pages'] );
        if ( array_key_exists( 'revisions', $newPageInfoData ) ) {
            $pageInfoData['revisions'] =
                array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] );
        }
        if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) {
            $pageInfoData['imageinfo'] =
                array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
        }
        if ( array_key_exists( 'templates', $newPageInfoData ) ) {
            $pageInfoData['templates'] =
                array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] );
        }
        if ( array_key_exists( 'categories', $newPageInfoData ) ) {
            $pageInfoData['categories'] =
                array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] );
        }
        $this->checkRevisionCount( $sourceUrl, $pageInfoData );
        $this->checkMaxRevisionAggregatedBytes( $pageInfoData );
    }
    /**
     * Throws an exception if the number of revisions to be imported exceeds
     * the maximum revision limit
     *
     * @param SourceUrl $sourceUrl
     * @param array[] $pageInfoData
     *
     * @throws LocalizedImportException
     */
    private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ) {
        if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions ||
            count( $pageInfoData['imageinfo'] ) > $this->maxRevisions ||
            count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS ||
            count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) {
            $this->logger->warning(
                'Too many revisions were being fetched',
                [
                    'sourceUrl' => $sourceUrl->getUrl(),
                ]
            );
            throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' );
        }
    }
    private function checkMaxRevisionAggregatedBytes( $pageInfoData ) {
        $aggregatedFileBytes = 0;
        foreach ( $pageInfoData['imageinfo'] as $fileVersion ) {
            $aggregatedFileBytes += $fileVersion['size'];
            if ( $aggregatedFileBytes > $this->maxAggregatedBytes ||
                $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) {
                $versions = count( $pageInfoData['imageinfo'] );
                throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
            }
        }
    }
    /**
     * @param array[] $imageInfo
     * @param string $pageTitle
     *
     * @return FileRevisions
     */
    private function getFileRevisionsFromImageInfo( array $imageInfo, $pageTitle ) {
        $revisions = [];
        foreach ( $imageInfo as $revisionInfo ) {
            if ( array_key_exists( 'filehidden', $revisionInfo ) ) {
                throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' );
            }
            if ( array_key_exists( 'filemissing', $revisionInfo ) ) {
                throw new LocalizedImportException( 'fileimporter-filemissinginrevision' );
            }
            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
                $revisionInfo['user'] = $this->suppressedUsername;
            }
            if ( array_key_exists( 'sha1hidden', $revisionInfo ) ) {
                $revisionInfo['sha1'] = sha1( $revisionInfo['*'] );
            }
            if ( array_key_exists( 'size', $revisionInfo ) ) {
                if ( $revisionInfo['size'] > $this->maxBytes ) {
                    $versions = count( $imageInfo );
                    throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] );
                }
            }
            /**
             * Convert from API sha1 format to DB sha1 format.
             * The conversion can be se inside ApiQueryImageInfo.
             *  - API sha1 format is base 16 padded to 40 chars
             *  - DB sha1 format is base 36 padded to 31 chars
             */
            $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 );
            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
                $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' )
                    ->plain();
            }
            $revisionInfo['bits'] = $revisionInfo['size'];
            $revisionInfo['name'] = $pageTitle;
            $revisionInfo['description'] = $revisionInfo['comment'];
            $revisions[] = new FileRevision( $revisionInfo );
        }
        return new FileRevisions( $revisions );
    }
    /**
     * @param array[] $revisionsInfo
     * @param string $pageTitle
     *
     * @return TextRevisions
     */
    private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, $pageTitle ) {
        $revisions = [];
        foreach ( $revisionsInfo as $revisionInfo ) {
            if ( array_key_exists( 'userhidden', $revisionInfo ) ) {
                $revisionInfo['user'] = $this->suppressedUsername;
            }
            if ( array_key_exists( 'texthidden', $revisionInfo ) ) {
                $revisionInfo['*'] = wfMessage( 'fileimporter-revision-removed-text' )
                    ->plain();
            }
            if ( array_key_exists( 'sha1hidden', $revisionInfo ) ) {
                $revisionInfo['sha1'] = \Wikimedia\base_convert(
                    sha1( $revisionInfo['*'] ), 16, 36, 31
                );
            }
            if ( array_key_exists( 'commenthidden', $revisionInfo ) ) {
                $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' )
                    ->plain();
            }
            if ( !array_key_exists( 'contentmodel', $revisionInfo ) ) {
                $revisionInfo['contentmodel'] = CONTENT_MODEL_WIKITEXT;
            }
            if ( !array_key_exists( 'contentformat', $revisionInfo ) ) {
                $revisionInfo['contentformat'] = CONTENT_FORMAT_WIKITEXT;
            }
            $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo );
            $revisionInfo['title'] = $pageTitle;
            $revisions[] = new TextRevision( $revisionInfo );
        }
        return new TextRevisions( $revisions );
    }
    /**
     * @param SourceUrl $sourceUrl
     * @return string[]
     */
    private function getBaseParams( SourceUrl $sourceUrl ) {
        return [
            'action' => 'query',
            'format' => 'json',
            'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ),
            'prop' => 'info'
        ];
    }
    /**
     * Adds to params base the properties for getting Text Revisions
     *
     * @param array $params
     * @param string|null $rvContinue
     *
     * @return array
     */
    private function addTextRevisionsToParams( array $params, $rvContinue = null ) {
        $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions';
        if ( $rvContinue ) {
            $params['rvcontinue'] = $rvContinue;
        }
        return $params + [
            'rvlimit' => static::API_RESULT_LIMIT,
            'rvdir' => 'newer',
            'rvprop' => implode(
                '|',
                [
                    'flags',
                    'timestamp',
                    'user',
                    'sha1',
                    'contentmodel',
                    'comment',
                    'content',
                    'tags',
                ]
            )
        ];
    }
    /**
     * Adds to params base the properties for getting File Revisions
     *
     * @param array $params
     * @param string|null $iiStart
     *
     * @return array
     */
    private function addFileRevisionsToParams( array $params, $iiStart = null ) {
        $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo';
        if ( $iiStart ) {
            $params['iistart'] = $iiStart;
        }
        return $params + [
            'iilimit' => static::API_RESULT_LIMIT,
            'iiurlwidth' => 800,
            'iiurlheight' => 400,
            'iiprop' => implode(
                '|',
                [
                    'timestamp',
                    'user',
                    'userid',
                    'comment',
                    'canonicaltitle',
                    'url',
                    'size',
                    'sha1',
                ]
            )
        ];
    }
    /**
     * Adds to params base the properties for getting Templates
     *
     * @param array $params
     * @param string|null $tlContinue
     *
     * @return array
     */
    private function addTemplatesToParams( array $params, $tlContinue = null ) {
        $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates';
        if ( $tlContinue ) {
            $params['tlcontinue'] = $tlContinue;
        }
        return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ];
    }
    /**
     * Adds to params base the properties for getting Categories
     *
     * @param array $params
     * @param string|null $clContinue
     *
     * @return array
     */
    private function addCategoriesToParams( array $params, $clContinue = null ) {
        $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories';
        if ( $clContinue ) {
            $params['clcontinue'] = $clContinue;
        }
        return $params + [ 'cllimit' => static::API_RESULT_LIMIT ];
    }
}