Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
91.49% covered (success)
91.49%
43 / 47
75.00% covered (warning)
75.00%
6 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
CommonsHelperConfigRetriever
91.49% covered (success)
91.49%
43 / 47
75.00% covered (warning)
75.00%
6 / 8
16.16
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 retrieveConfiguration
93.33% covered (success)
93.33%
14 / 15
0.00% covered (danger)
0.00%
0 / 1
7.01
 getConfigWikitext
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getConfigWikiUrl
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 buildCommonsHelperConfigUrl
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 sendApiRequest
84.21% covered (warning)
84.21%
16 / 19
0.00% covered (danger)
0.00%
0 / 1
2.02
 getQueryParamTitle
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getHostWithoutTopLevelDomain
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use FileImporter\Data\SourceUrl;
6use FileImporter\Exceptions\HttpRequestException;
7use FileImporter\Exceptions\ImportException;
8use FileImporter\Exceptions\LocalizedImportException;
9use FileImporter\Services\Http\HttpRequestExecutor;
10use MediaWiki\Config\Config;
11use MediaWiki\MainConfigNames;
12use MediaWiki\MediaWikiServices;
13use MediaWiki\Revision\SlotRecord;
14
15/**
16 * @license GPL-2.0-or-later
17 */
18class CommonsHelperConfigRetriever {
19
20    private Config $mainConfig;
21
22    /** @var string|null Guaranteed to be set when {@link retrieveConfiguration} returned true */
23    private $configWikitext;
24    /** @var string|null Guaranteed to be set when {@link retrieveConfiguration} returned true */
25    private $configWikiUrl;
26
27    /**
28     * @param HttpRequestExecutor $httpRequestExecutor
29     * @param string $configServer Full domain including schema, e.g. "https://www.mediawiki.org"
30     * @param string $configBasePageName Base page name, e.g. "Extension:FileImporter/Data/"
31     */
32    public function __construct(
33        private readonly HttpRequestExecutor $httpRequestExecutor,
34        private readonly string $configServer,
35        private readonly string $configBasePageName,
36    ) {
37        // TODO: Inject?
38        $this->mainConfig = MediaWikiServices::getInstance()->getMainConfig();
39    }
40
41    /**
42     * @return bool True if a config was found
43     * @throws ImportException e.g. when the config page doesn't exist
44     */
45    public function retrieveConfiguration( SourceUrl $sourceUrl ): bool {
46        $response = $this->sendApiRequest( $sourceUrl );
47
48        if ( count( $response['query']['pages'] ?? [] ) !== 1 ) {
49            return false;
50        }
51
52        $currPage = array_last( $response['query']['pages'] );
53
54        if ( ( $currPage['missing'] ?? false ) !== false ) {
55            return false;
56        }
57
58        if ( array_key_exists( 'revisions', $currPage ) ) {
59            $latestRevision = array_last( $currPage['revisions'] );
60            if ( array_key_exists( 'slots', $latestRevision ) &&
61                array_key_exists( SlotRecord::MAIN, $latestRevision['slots'] ) &&
62                array_key_exists( 'content', $latestRevision['slots'][SlotRecord::MAIN] )
63            ) {
64                $this->configWikiUrl = $this->buildCommonsHelperConfigUrl( $sourceUrl );
65                $this->configWikitext = $latestRevision['slots'][SlotRecord::MAIN]['content'];
66                return true;
67            }
68        }
69
70        throw new LocalizedImportException( 'fileimporter-commonshelper-retrieval-failed' );
71    }
72
73    /**
74     * @return string
75     */
76    public function getConfigWikitext() {
77        return $this->configWikitext;
78    }
79
80    /**
81     * @return string
82     */
83    public function getConfigWikiUrl() {
84        return $this->configWikiUrl;
85    }
86
87    private function buildCommonsHelperConfigUrl( SourceUrl $sourceUrl ): string {
88        $title = $this->getQueryParamTitle( $sourceUrl );
89
90        // We assume the wiki holding the config pages uses the same configuration.
91        $articlePath = str_replace( '$1', $title, $this->mainConfig->get( MainConfigNames::ArticlePath ) );
92
93        return $this->configServer . $articlePath;
94    }
95
96    /**
97     * @return array[]
98     * @throws ImportException when the request failed
99     */
100    private function sendApiRequest( SourceUrl $sourceUrl ): array {
101        // We assume the wiki holding the config pages uses the same configuration.
102        $scriptPath = $this->mainConfig->get( MainConfigNames::ScriptPath );
103        $apiUrl = $this->configServer . $scriptPath . '/api.php';
104        $apiParameters = [
105            'action' => 'query',
106            'errorformat' => 'plaintext',
107            'format' => 'json',
108            'titles' => $this->getQueryParamTitle( $sourceUrl ),
109            'prop' => 'revisions',
110            'formatversion' => 2,
111            'rvprop' => 'content',
112            'rvlimit' => 1,
113            'rvslots' => SlotRecord::MAIN,
114            'rvdir' => 'older'
115        ];
116
117        try {
118            $request = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters );
119        } catch ( HttpRequestException $e ) {
120            throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo',
121                $apiUrl ], $e );
122        }
123
124        return json_decode( $request->getContent(), true );
125    }
126
127    private function getQueryParamTitle( SourceUrl $sourceUrl ): string {
128        $domain = $this->getHostWithoutTopLevelDomain( $sourceUrl );
129
130        if ( ctype_alpha( $domain ) ) {
131            // Default to "www.mediawiki", even when the source URL was "https://mediawiki.org/…"
132            $domain = 'www.' . $domain;
133        }
134
135        return str_replace( ' ', '_', $this->configBasePageName ) . $domain;
136    }
137
138    /**
139     * @return string Full host with all subdomains, but without the top-level domain (if a
140     *  top-level domain was given), e.g. "en.wikipedia".
141     */
142    private function getHostWithoutTopLevelDomain( SourceUrl $sourceUrl ): string {
143        $domain = $sourceUrl->getHost();
144
145        // Reuse the original configuration pages for test imports from the Beta cluster
146        $domain = str_replace( '.beta.wmflabs.org', '.org', $domain );
147
148        return preg_replace( '/\.\w+$/', '', $domain );
149    }
150
151}