Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
92.00% covered (success)
92.00%
46 / 50
75.00% covered (warning)
75.00%
6 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
CommonsHelperConfigRetriever
92.00% covered (success)
92.00%
46 / 50
75.00% covered (warning)
75.00%
6 / 8
16.13
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 retrieveConfiguration
93.33% covered (success)
93.33%
14 / 15
0.00% covered (danger)
0.00%
0 / 1
7.01
 getConfigWikitext
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getConfigWikiUrl
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 buildCommonsHelperConfigUrl
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 sendApiRequest
84.21% covered (warning)
84.21%
16 / 19
0.00% covered (danger)
0.00%
0 / 1
2.02
 getQueryParamTitle
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getHostWithoutTopLevelDomain
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use FileImporter\Data\SourceUrl;
6use FileImporter\Exceptions\HttpRequestException;
7use FileImporter\Exceptions\ImportException;
8use FileImporter\Exceptions\LocalizedImportException;
9use FileImporter\Services\Http\HttpRequestExecutor;
10use MediaWiki\Config\Config;
11use MediaWiki\MainConfigNames;
12use MediaWiki\MediaWikiServices;
13use MediaWiki\Revision\SlotRecord;
14
15/**
16 * @license GPL-2.0-or-later
17 */
18class CommonsHelperConfigRetriever {
19
20    private Config $mainConfig;
21    private HttpRequestExecutor $httpRequestExecutor;
22    private string $configServer;
23    private string $configBasePageName;
24
25    /** @var string|null */
26    private $configWikitext = null;
27    /** @var string|null */
28    private $configWikiUrl = null;
29
30    /**
31     * @param HttpRequestExecutor $httpRequestExecutor
32     * @param string $configServer Full domain including schema, e.g. "https://www.mediawiki.org"
33     * @param string $configBasePageName Base page name, e.g. "Extension:FileImporter/Data/"
34     */
35    public function __construct(
36        HttpRequestExecutor $httpRequestExecutor,
37        string $configServer,
38        string $configBasePageName
39    ) {
40        // TODO: Inject?
41        $this->mainConfig = MediaWikiServices::getInstance()->getMainConfig();
42
43        $this->httpRequestExecutor = $httpRequestExecutor;
44        $this->configServer = $configServer;
45        $this->configBasePageName = $configBasePageName;
46    }
47
48    /**
49     * @return bool True if a config was found
50     * @throws ImportException e.g. when the config page doesn't exist
51     */
52    public function retrieveConfiguration( SourceUrl $sourceUrl ): bool {
53        $response = $this->sendApiRequest( $sourceUrl );
54
55        if ( count( $response['query']['pages'] ?? [] ) !== 1 ) {
56            return false;
57        }
58
59        $currPage = end( $response['query']['pages'] );
60
61        if ( array_key_exists( 'missing', $currPage ) ) {
62            return false;
63        }
64
65        if ( array_key_exists( 'revisions', $currPage ) ) {
66            $latestRevision = end( $currPage['revisions'] );
67            if ( array_key_exists( 'slots', $latestRevision ) &&
68                array_key_exists( SlotRecord::MAIN, $latestRevision['slots'] ) &&
69                array_key_exists( 'content', $latestRevision['slots'][SlotRecord::MAIN] )
70            ) {
71                $this->configWikiUrl = $this->buildCommonsHelperConfigUrl( $sourceUrl );
72                $this->configWikitext = $latestRevision['slots'][SlotRecord::MAIN]['content'];
73                return true;
74            }
75        }
76
77        throw new LocalizedImportException( 'fileimporter-commonshelper-retrieval-failed' );
78    }
79
80    /**
81     * @return string|null
82     */
83    public function getConfigWikitext() {
84        return $this->configWikitext;
85    }
86
87    /**
88     * @return string|null
89     */
90    public function getConfigWikiUrl() {
91        return $this->configWikiUrl;
92    }
93
94    private function buildCommonsHelperConfigUrl( SourceUrl $sourceUrl ): string {
95        $title = $this->getQueryParamTitle( $sourceUrl );
96
97        // We assume the wiki holding the config pages uses the same configuration.
98        $articlePath = str_replace( '$1', $title, $this->mainConfig->get( MainConfigNames::ArticlePath ) );
99
100        return $this->configServer . $articlePath;
101    }
102
103    /**
104     * @return array[]
105     * @throws ImportException when the request failed
106     */
107    private function sendApiRequest( SourceUrl $sourceUrl ): array {
108        // We assume the wiki holding the config pages uses the same configuration.
109        $scriptPath = $this->mainConfig->get( MainConfigNames::ScriptPath );
110        $apiUrl = $this->configServer . $scriptPath . '/api.php';
111        $apiParameters = [
112            'action' => 'query',
113            'errorformat' => 'plaintext',
114            'format' => 'json',
115            'titles' => $this->getQueryParamTitle( $sourceUrl ),
116            'prop' => 'revisions',
117            'formatversion' => 2,
118            'rvprop' => 'content',
119            'rvlimit' => 1,
120            'rvslots' => SlotRecord::MAIN,
121            'rvdir' => 'older'
122        ];
123
124        try {
125            $request = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
126        } catch ( HttpRequestException $e ) {
127            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
128                $apiUrl ], $e );
129        }
130
131        return json_decode( $request->getContent(), true );
132    }
133
134    private function getQueryParamTitle( SourceUrl $sourceUrl ): string {
135        $domain = $this->getHostWithoutTopLevelDomain( $sourceUrl );
136
137        if ( ctype_alpha( $domain ) ) {
138            // Default to "www.mediawiki", even when the source URL was "https://mediawiki.org/…"
139            $domain = 'www.' . $domain;
140        }
141
142        return str_replace( ' ', '_', $this->configBasePageName ) . $domain;
143    }
144
145    /**
146     * @return string Full host with all subdomains, but without the top-level domain (if a
147     *  top-level domain was given), e.g. "en.wikipedia".
148     */
149    private function getHostWithoutTopLevelDomain( SourceUrl $sourceUrl ): string {
150        $domain = $sourceUrl->getHost();
151
152        // Reuse the original configuration pages for test imports from the Beta cluster
153        $domain = str_replace( '.beta.wmflabs.org', '.org', $domain );
154
155        return preg_replace( '/\.\w+$/', '', $domain );
156    }
157
158}