Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
92.00% |
46 / 50 |
|
75.00% |
6 / 8 |
CRAP | |
0.00% |
0 / 1 |
CommonsHelperConfigRetriever | |
92.00% |
46 / 50 |
|
75.00% |
6 / 8 |
16.13 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
retrieveConfiguration | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
7.01 | |||
getConfigWikitext | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getConfigWikiUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
buildCommonsHelperConfigUrl | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
sendApiRequest | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
2.02 | |||
getQueryParamTitle | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getHostWithoutTopLevelDomain | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use FileImporter\Data\SourceUrl; |
6 | use FileImporter\Exceptions\HttpRequestException; |
7 | use FileImporter\Exceptions\ImportException; |
8 | use FileImporter\Exceptions\LocalizedImportException; |
9 | use FileImporter\Services\Http\HttpRequestExecutor; |
10 | use MediaWiki\Config\Config; |
11 | use MediaWiki\MainConfigNames; |
12 | use MediaWiki\MediaWikiServices; |
13 | use MediaWiki\Revision\SlotRecord; |
14 | |
15 | /** |
16 | * @license GPL-2.0-or-later |
17 | */ |
18 | class CommonsHelperConfigRetriever { |
19 | |
20 | private Config $mainConfig; |
21 | private HttpRequestExecutor $httpRequestExecutor; |
22 | private string $configServer; |
23 | private string $configBasePageName; |
24 | |
25 | /** @var string|null */ |
26 | private $configWikitext = null; |
27 | /** @var string|null */ |
28 | private $configWikiUrl = null; |
29 | |
30 | /** |
31 | * @param HttpRequestExecutor $httpRequestExecutor |
32 | * @param string $configServer Full domain including schema, e.g. "https://www.mediawiki.org" |
33 | * @param string $configBasePageName Base page name, e.g. "Extension:FileImporter/Data/" |
34 | */ |
35 | public function __construct( |
36 | HttpRequestExecutor $httpRequestExecutor, |
37 | string $configServer, |
38 | string $configBasePageName |
39 | ) { |
40 | // TODO: Inject? |
41 | $this->mainConfig = MediaWikiServices::getInstance()->getMainConfig(); |
42 | |
43 | $this->httpRequestExecutor = $httpRequestExecutor; |
44 | $this->configServer = $configServer; |
45 | $this->configBasePageName = $configBasePageName; |
46 | } |
47 | |
48 | /** |
49 | * @return bool True if a config was found |
50 | * @throws ImportException e.g. when the config page doesn't exist |
51 | */ |
52 | public function retrieveConfiguration( SourceUrl $sourceUrl ): bool { |
53 | $response = $this->sendApiRequest( $sourceUrl ); |
54 | |
55 | if ( count( $response['query']['pages'] ?? [] ) !== 1 ) { |
56 | return false; |
57 | } |
58 | |
59 | $currPage = end( $response['query']['pages'] ); |
60 | |
61 | if ( array_key_exists( 'missing', $currPage ) ) { |
62 | return false; |
63 | } |
64 | |
65 | if ( array_key_exists( 'revisions', $currPage ) ) { |
66 | $latestRevision = end( $currPage['revisions'] ); |
67 | if ( array_key_exists( 'slots', $latestRevision ) && |
68 | array_key_exists( SlotRecord::MAIN, $latestRevision['slots'] ) && |
69 | array_key_exists( 'content', $latestRevision['slots'][SlotRecord::MAIN] ) |
70 | ) { |
71 | $this->configWikiUrl = $this->buildCommonsHelperConfigUrl( $sourceUrl ); |
72 | $this->configWikitext = $latestRevision['slots'][SlotRecord::MAIN]['content']; |
73 | return true; |
74 | } |
75 | } |
76 | |
77 | throw new LocalizedImportException( 'fileimporter-commonshelper-retrieval-failed' ); |
78 | } |
79 | |
80 | /** |
81 | * @return string|null |
82 | */ |
83 | public function getConfigWikitext() { |
84 | return $this->configWikitext; |
85 | } |
86 | |
87 | /** |
88 | * @return string|null |
89 | */ |
90 | public function getConfigWikiUrl() { |
91 | return $this->configWikiUrl; |
92 | } |
93 | |
94 | private function buildCommonsHelperConfigUrl( SourceUrl $sourceUrl ): string { |
95 | $title = $this->getQueryParamTitle( $sourceUrl ); |
96 | |
97 | // We assume the wiki holding the config pages uses the same configuration. |
98 | $articlePath = str_replace( '$1', $title, $this->mainConfig->get( MainConfigNames::ArticlePath ) ); |
99 | |
100 | return $this->configServer . $articlePath; |
101 | } |
102 | |
103 | /** |
104 | * @return array[] |
105 | * @throws ImportException when the request failed |
106 | */ |
107 | private function sendApiRequest( SourceUrl $sourceUrl ): array { |
108 | // We assume the wiki holding the config pages uses the same configuration. |
109 | $scriptPath = $this->mainConfig->get( MainConfigNames::ScriptPath ); |
110 | $apiUrl = $this->configServer . $scriptPath . '/api.php'; |
111 | $apiParameters = [ |
112 | 'action' => 'query', |
113 | 'errorformat' => 'plaintext', |
114 | 'format' => 'json', |
115 | 'titles' => $this->getQueryParamTitle( $sourceUrl ), |
116 | 'prop' => 'revisions', |
117 | 'formatversion' => 2, |
118 | 'rvprop' => 'content', |
119 | 'rvlimit' => 1, |
120 | 'rvslots' => SlotRecord::MAIN, |
121 | 'rvdir' => 'older' |
122 | ]; |
123 | |
124 | try { |
125 | $request = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters ); |
126 | } catch ( HttpRequestException $e ) { |
127 | throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo', |
128 | $apiUrl ], $e ); |
129 | } |
130 | |
131 | return json_decode( $request->getContent(), true ); |
132 | } |
133 | |
134 | private function getQueryParamTitle( SourceUrl $sourceUrl ): string { |
135 | $domain = $this->getHostWithoutTopLevelDomain( $sourceUrl ); |
136 | |
137 | if ( ctype_alpha( $domain ) ) { |
138 | // Default to "www.mediawiki", even when the source URL was "https://mediawiki.org/…" |
139 | $domain = 'www.' . $domain; |
140 | } |
141 | |
142 | return str_replace( ' ', '_', $this->configBasePageName ) . $domain; |
143 | } |
144 | |
145 | /** |
146 | * @return string Full host with all subdomains, but without the top-level domain (if a |
147 | * top-level domain was given), e.g. "en.wikipedia". |
148 | */ |
149 | private function getHostWithoutTopLevelDomain( SourceUrl $sourceUrl ): string { |
150 | $domain = $sourceUrl->getHost(); |
151 | |
152 | // Reuse the original configuration pages for test imports from the Beta cluster |
153 | $domain = str_replace( '.beta.wmflabs.org', '.org', $domain ); |
154 | |
155 | return preg_replace( '/\.\w+$/', '', $domain ); |
156 | } |
157 | |
158 | } |