Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
92.00% covered (success)
92.00%
46 / 50
75.00% covered (warning)
75.00%
6 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
CommonsHelperConfigRetriever
92.00% covered (success)
92.00%
46 / 50
75.00% covered (warning)
75.00%
6 / 8
16.13
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 retrieveConfiguration
93.33% covered (success)
93.33%
14 / 15
0.00% covered (danger)
0.00%
0 / 1
7.01
 getConfigWikitext
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getConfigWikiUrl
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 buildCommonsHelperConfigUrl
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 sendApiRequest
84.21% covered (warning)
84.21%
16 / 19
0.00% covered (danger)
0.00%
0 / 1
2.02
 getQueryParamTitle
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getHostWithoutTopLevelDomain
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use FileImporter\Data\SourceUrl;
6use FileImporter\Exceptions\HttpRequestException;
7use FileImporter\Exceptions\ImportException;
8use FileImporter\Exceptions\LocalizedImportException;
9use FileImporter\Services\Http\HttpRequestExecutor;
10use MediaWiki\Config\Config;
11use MediaWiki\MainConfigNames;
12use MediaWiki\MediaWikiServices;
13use MediaWiki\Revision\SlotRecord;
14
15/**
16 * @license GPL-2.0-or-later
17 */
18class CommonsHelperConfigRetriever {
19
20    /** @var Config */
21    private $mainConfig;
22    /** @var HttpRequestExecutor */
23    private $httpRequestExecutor;
24    /** @var string */
25    private $configServer;
26    /** @var string */
27    private $configBasePageName;
28    /** @var string|null */
29    private $configWikitext = null;
30    /** @var string|null */
31    private $configWikiUrl = null;
32
33    /**
34     * @param HttpRequestExecutor $httpRequestExecutor
35     * @param string $configServer Full domain including schema, e.g. "https://www.mediawiki.org"
36     * @param string $configBasePageName Base page name, e.g. "Extension:FileImporter/Data/"
37     */
38    public function __construct(
39        HttpRequestExecutor $httpRequestExecutor,
40        string $configServer,
41        string $configBasePageName
42    ) {
43        // TODO: Inject?
44        $this->mainConfig = MediaWikiServices::getInstance()->getMainConfig();
45
46        $this->httpRequestExecutor = $httpRequestExecutor;
47        $this->configServer = $configServer;
48        $this->configBasePageName = $configBasePageName;
49    }
50
51    /**
52     * @return bool True if a config was found
53     * @throws ImportException e.g. when the config page doesn't exist
54     */
55    public function retrieveConfiguration( SourceUrl $sourceUrl ): bool {
56        $response = $this->sendApiRequest( $sourceUrl );
57
58        if ( count( $response['query']['pages'] ?? [] ) !== 1 ) {
59            return false;
60        }
61
62        $currPage = end( $response['query']['pages'] );
63
64        if ( array_key_exists( 'missing', $currPage ) ) {
65            return false;
66        }
67
68        if ( array_key_exists( 'revisions', $currPage ) ) {
69            $latestRevision = end( $currPage['revisions'] );
70            if ( array_key_exists( 'slots', $latestRevision ) &&
71                array_key_exists( SlotRecord::MAIN, $latestRevision['slots'] ) &&
72                array_key_exists( 'content', $latestRevision['slots'][SlotRecord::MAIN] ) ) {
73                $this->configWikiUrl = $this->buildCommonsHelperConfigUrl( $sourceUrl );
74                $this->configWikitext = $latestRevision['slots'][SlotRecord::MAIN]['content'];
75                return true;
76            }
77        }
78
79        throw new LocalizedImportException( 'fileimporter-commonshelper-retrieval-failed' );
80    }
81
82    /**
83     * @return string|null
84     */
85    public function getConfigWikitext() {
86        return $this->configWikitext;
87    }
88
89    /**
90     * @return string|null
91     */
92    public function getConfigWikiUrl() {
93        return $this->configWikiUrl;
94    }
95
96    private function buildCommonsHelperConfigUrl( SourceUrl $sourceUrl ): string {
97        $title = $this->getQueryParamTitle( $sourceUrl );
98
99        // We assume the wiki holding the config pages uses the same configuration.
100        $articlePath = str_replace( '$1', $title, $this->mainConfig->get( MainConfigNames::ArticlePath ) );
101
102        return $this->configServer . $articlePath;
103    }
104
105    /**
106     * @return array[]
107     * @throws ImportException when the request failed
108     */
109    private function sendApiRequest( SourceUrl $sourceUrl ): array {
110        // We assume the wiki holding the config pages uses the same configuration.
111        $scriptPath = $this->mainConfig->get( MainConfigNames::ScriptPath );
112        $apiUrl = $this->configServer . $scriptPath . '/api.php';
113        $apiParameters = [
114            'action' => 'query',
115            'errorformat' => 'plaintext',
116            'format' => 'json',
117            'titles' => $this->getQueryParamTitle( $sourceUrl ),
118            'prop' => 'revisions',
119            'formatversion' => 2,
120            'rvprop' => 'content',
121            'rvlimit' => 1,
122            'rvslots' => SlotRecord::MAIN,
123            'rvdir' => 'older'
124        ];
125
126        try {
127            $request = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
128        } catch ( HttpRequestException $e ) {
129            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
130                $apiUrl ], $e );
131        }
132
133        return json_decode( $request->getContent(), true );
134    }
135
136    private function getQueryParamTitle( SourceUrl $sourceUrl ): string {
137        $domain = $this->getHostWithoutTopLevelDomain( $sourceUrl );
138
139        if ( ctype_alpha( $domain ) ) {
140            // Default to "www.mediawiki", even when the source URL was "https://mediawiki.org/…"
141            $domain = 'www.' . $domain;
142        }
143
144        return str_replace( ' ', '_', $this->configBasePageName ) . $domain;
145    }
146
147    /**
148     * @return string Full host with all subdomains, but without the top-level domain (if a
149     *  top-level domain was given), e.g. "en.wikipedia".
150     */
151    private function getHostWithoutTopLevelDomain( SourceUrl $sourceUrl ): string {
152        $domain = $sourceUrl->getHost();
153
154        // Reuse the original configuration pages for test imports from the Beta cluster
155        $domain = str_replace( '.beta.wmflabs.org', '.org', $domain );
156
157        return preg_replace( '/\.\w+$/', '', $domain );
158    }
159
160}