Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
92.00% |
46 / 50 |
|
75.00% |
6 / 8 |
CRAP | |
0.00% |
0 / 1 |
CommonsHelperConfigRetriever | |
92.00% |
46 / 50 |
|
75.00% |
6 / 8 |
16.13 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
retrieveConfiguration | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
7.01 | |||
getConfigWikitext | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getConfigWikiUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
buildCommonsHelperConfigUrl | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
sendApiRequest | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
2.02 | |||
getQueryParamTitle | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getHostWithoutTopLevelDomain | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use FileImporter\Data\SourceUrl; |
6 | use FileImporter\Exceptions\HttpRequestException; |
7 | use FileImporter\Exceptions\ImportException; |
8 | use FileImporter\Exceptions\LocalizedImportException; |
9 | use FileImporter\Services\Http\HttpRequestExecutor; |
10 | use MediaWiki\Config\Config; |
11 | use MediaWiki\MainConfigNames; |
12 | use MediaWiki\MediaWikiServices; |
13 | use MediaWiki\Revision\SlotRecord; |
14 | |
15 | /** |
16 | * @license GPL-2.0-or-later |
17 | */ |
18 | class CommonsHelperConfigRetriever { |
19 | |
20 | /** @var Config */ |
21 | private $mainConfig; |
22 | /** @var HttpRequestExecutor */ |
23 | private $httpRequestExecutor; |
24 | /** @var string */ |
25 | private $configServer; |
26 | /** @var string */ |
27 | private $configBasePageName; |
28 | /** @var string|null */ |
29 | private $configWikitext = null; |
30 | /** @var string|null */ |
31 | private $configWikiUrl = null; |
32 | |
33 | /** |
34 | * @param HttpRequestExecutor $httpRequestExecutor |
35 | * @param string $configServer Full domain including schema, e.g. "https://www.mediawiki.org" |
36 | * @param string $configBasePageName Base page name, e.g. "Extension:FileImporter/Data/" |
37 | */ |
38 | public function __construct( |
39 | HttpRequestExecutor $httpRequestExecutor, |
40 | string $configServer, |
41 | string $configBasePageName |
42 | ) { |
43 | // TODO: Inject? |
44 | $this->mainConfig = MediaWikiServices::getInstance()->getMainConfig(); |
45 | |
46 | $this->httpRequestExecutor = $httpRequestExecutor; |
47 | $this->configServer = $configServer; |
48 | $this->configBasePageName = $configBasePageName; |
49 | } |
50 | |
51 | /** |
52 | * @return bool True if a config was found |
53 | * @throws ImportException e.g. when the config page doesn't exist |
54 | */ |
55 | public function retrieveConfiguration( SourceUrl $sourceUrl ): bool { |
56 | $response = $this->sendApiRequest( $sourceUrl ); |
57 | |
58 | if ( count( $response['query']['pages'] ?? [] ) !== 1 ) { |
59 | return false; |
60 | } |
61 | |
62 | $currPage = end( $response['query']['pages'] ); |
63 | |
64 | if ( array_key_exists( 'missing', $currPage ) ) { |
65 | return false; |
66 | } |
67 | |
68 | if ( array_key_exists( 'revisions', $currPage ) ) { |
69 | $latestRevision = end( $currPage['revisions'] ); |
70 | if ( array_key_exists( 'slots', $latestRevision ) && |
71 | array_key_exists( SlotRecord::MAIN, $latestRevision['slots'] ) && |
72 | array_key_exists( 'content', $latestRevision['slots'][SlotRecord::MAIN] ) ) { |
73 | $this->configWikiUrl = $this->buildCommonsHelperConfigUrl( $sourceUrl ); |
74 | $this->configWikitext = $latestRevision['slots'][SlotRecord::MAIN]['content']; |
75 | return true; |
76 | } |
77 | } |
78 | |
79 | throw new LocalizedImportException( 'fileimporter-commonshelper-retrieval-failed' ); |
80 | } |
81 | |
82 | /** |
83 | * @return string|null |
84 | */ |
85 | public function getConfigWikitext() { |
86 | return $this->configWikitext; |
87 | } |
88 | |
89 | /** |
90 | * @return string|null |
91 | */ |
92 | public function getConfigWikiUrl() { |
93 | return $this->configWikiUrl; |
94 | } |
95 | |
96 | private function buildCommonsHelperConfigUrl( SourceUrl $sourceUrl ): string { |
97 | $title = $this->getQueryParamTitle( $sourceUrl ); |
98 | |
99 | // We assume the wiki holding the config pages uses the same configuration. |
100 | $articlePath = str_replace( '$1', $title, $this->mainConfig->get( MainConfigNames::ArticlePath ) ); |
101 | |
102 | return $this->configServer . $articlePath; |
103 | } |
104 | |
105 | /** |
106 | * @return array[] |
107 | * @throws ImportException when the request failed |
108 | */ |
109 | private function sendApiRequest( SourceUrl $sourceUrl ): array { |
110 | // We assume the wiki holding the config pages uses the same configuration. |
111 | $scriptPath = $this->mainConfig->get( MainConfigNames::ScriptPath ); |
112 | $apiUrl = $this->configServer . $scriptPath . '/api.php'; |
113 | $apiParameters = [ |
114 | 'action' => 'query', |
115 | 'errorformat' => 'plaintext', |
116 | 'format' => 'json', |
117 | 'titles' => $this->getQueryParamTitle( $sourceUrl ), |
118 | 'prop' => 'revisions', |
119 | 'formatversion' => 2, |
120 | 'rvprop' => 'content', |
121 | 'rvlimit' => 1, |
122 | 'rvslots' => SlotRecord::MAIN, |
123 | 'rvdir' => 'older' |
124 | ]; |
125 | |
126 | try { |
127 | $request = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters ); |
128 | } catch ( HttpRequestException $e ) { |
129 | throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo', |
130 | $apiUrl ], $e ); |
131 | } |
132 | |
133 | return json_decode( $request->getContent(), true ); |
134 | } |
135 | |
136 | private function getQueryParamTitle( SourceUrl $sourceUrl ): string { |
137 | $domain = $this->getHostWithoutTopLevelDomain( $sourceUrl ); |
138 | |
139 | if ( ctype_alpha( $domain ) ) { |
140 | // Default to "www.mediawiki", even when the source URL was "https://mediawiki.org/…" |
141 | $domain = 'www.' . $domain; |
142 | } |
143 | |
144 | return str_replace( ' ', '_', $this->configBasePageName ) . $domain; |
145 | } |
146 | |
147 | /** |
148 | * @return string Full host with all subdomains, but without the top-level domain (if a |
149 | * top-level domain was given), e.g. "en.wikipedia". |
150 | */ |
151 | private function getHostWithoutTopLevelDomain( SourceUrl $sourceUrl ): string { |
152 | $domain = $sourceUrl->getHost(); |
153 | |
154 | // Reuse the original configuration pages for test imports from the Beta cluster |
155 | $domain = str_replace( '.beta.wmflabs.org', '.org', $domain ); |
156 | |
157 | return preg_replace( '/\.\w+$/', '', $domain ); |
158 | } |
159 | |
160 | } |