Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
91.49% |
43 / 47 |
|
75.00% |
6 / 8 |
CRAP | |
0.00% |
0 / 1 |
| CommonsHelperConfigRetriever | |
91.49% |
43 / 47 |
|
75.00% |
6 / 8 |
16.16 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| retrieveConfiguration | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
7.01 | |||
| getConfigWikitext | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getConfigWikiUrl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| buildCommonsHelperConfigUrl | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| sendApiRequest | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
2.02 | |||
| getQueryParamTitle | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getHostWithoutTopLevelDomain | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace FileImporter\Remote\MediaWiki; |
| 4 | |
| 5 | use FileImporter\Data\SourceUrl; |
| 6 | use FileImporter\Exceptions\HttpRequestException; |
| 7 | use FileImporter\Exceptions\ImportException; |
| 8 | use FileImporter\Exceptions\LocalizedImportException; |
| 9 | use FileImporter\Services\Http\HttpRequestExecutor; |
| 10 | use MediaWiki\Config\Config; |
| 11 | use MediaWiki\MainConfigNames; |
| 12 | use MediaWiki\MediaWikiServices; |
| 13 | use MediaWiki\Revision\SlotRecord; |
| 14 | |
| 15 | /** |
| 16 | * @license GPL-2.0-or-later |
| 17 | */ |
| 18 | class CommonsHelperConfigRetriever { |
| 19 | |
| 20 | private Config $mainConfig; |
| 21 | |
| 22 | /** @var string|null Guaranteed to be set when {@link retrieveConfiguration} returned true */ |
| 23 | private $configWikitext; |
| 24 | /** @var string|null Guaranteed to be set when {@link retrieveConfiguration} returned true */ |
| 25 | private $configWikiUrl; |
| 26 | |
| 27 | /** |
| 28 | * @param HttpRequestExecutor $httpRequestExecutor |
| 29 | * @param string $configServer Full domain including schema, e.g. "https://www.mediawiki.org" |
| 30 | * @param string $configBasePageName Base page name, e.g. "Extension:FileImporter/Data/" |
| 31 | */ |
| 32 | public function __construct( |
| 33 | private readonly HttpRequestExecutor $httpRequestExecutor, |
| 34 | private readonly string $configServer, |
| 35 | private readonly string $configBasePageName, |
| 36 | ) { |
| 37 | // TODO: Inject? |
| 38 | $this->mainConfig = MediaWikiServices::getInstance()->getMainConfig(); |
| 39 | } |
| 40 | |
| 41 | /** |
| 42 | * @return bool True if a config was found |
| 43 | * @throws ImportException e.g. when the config page doesn't exist |
| 44 | */ |
| 45 | public function retrieveConfiguration( SourceUrl $sourceUrl ): bool { |
| 46 | $response = $this->sendApiRequest( $sourceUrl ); |
| 47 | |
| 48 | if ( count( $response['query']['pages'] ?? [] ) !== 1 ) { |
| 49 | return false; |
| 50 | } |
| 51 | |
| 52 | $currPage = array_last( $response['query']['pages'] ); |
| 53 | |
| 54 | if ( ( $currPage['missing'] ?? false ) !== false ) { |
| 55 | return false; |
| 56 | } |
| 57 | |
| 58 | if ( array_key_exists( 'revisions', $currPage ) ) { |
| 59 | $latestRevision = array_last( $currPage['revisions'] ); |
| 60 | if ( array_key_exists( 'slots', $latestRevision ) && |
| 61 | array_key_exists( SlotRecord::MAIN, $latestRevision['slots'] ) && |
| 62 | array_key_exists( 'content', $latestRevision['slots'][SlotRecord::MAIN] ) |
| 63 | ) { |
| 64 | $this->configWikiUrl = $this->buildCommonsHelperConfigUrl( $sourceUrl ); |
| 65 | $this->configWikitext = $latestRevision['slots'][SlotRecord::MAIN]['content']; |
| 66 | return true; |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | throw new LocalizedImportException( 'fileimporter-commonshelper-retrieval-failed' ); |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * @return string |
| 75 | */ |
| 76 | public function getConfigWikitext() { |
| 77 | return $this->configWikitext; |
| 78 | } |
| 79 | |
| 80 | /** |
| 81 | * @return string |
| 82 | */ |
| 83 | public function getConfigWikiUrl() { |
| 84 | return $this->configWikiUrl; |
| 85 | } |
| 86 | |
| 87 | private function buildCommonsHelperConfigUrl( SourceUrl $sourceUrl ): string { |
| 88 | $title = $this->getQueryParamTitle( $sourceUrl ); |
| 89 | |
| 90 | // We assume the wiki holding the config pages uses the same configuration. |
| 91 | $articlePath = str_replace( '$1', $title, $this->mainConfig->get( MainConfigNames::ArticlePath ) ); |
| 92 | |
| 93 | return $this->configServer . $articlePath; |
| 94 | } |
| 95 | |
| 96 | /** |
| 97 | * @return array[] |
| 98 | * @throws ImportException when the request failed |
| 99 | */ |
| 100 | private function sendApiRequest( SourceUrl $sourceUrl ): array { |
| 101 | // We assume the wiki holding the config pages uses the same configuration. |
| 102 | $scriptPath = $this->mainConfig->get( MainConfigNames::ScriptPath ); |
| 103 | $apiUrl = $this->configServer . $scriptPath . '/api.php'; |
| 104 | $apiParameters = [ |
| 105 | 'action' => 'query', |
| 106 | 'errorformat' => 'plaintext', |
| 107 | 'format' => 'json', |
| 108 | 'titles' => $this->getQueryParamTitle( $sourceUrl ), |
| 109 | 'prop' => 'revisions', |
| 110 | 'formatversion' => 2, |
| 111 | 'rvprop' => 'content', |
| 112 | 'rvlimit' => 1, |
| 113 | 'rvslots' => SlotRecord::MAIN, |
| 114 | 'rvdir' => 'older' |
| 115 | ]; |
| 116 | |
| 117 | try { |
| 118 | $request = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters ); |
| 119 | } catch ( HttpRequestException $e ) { |
| 120 | throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo', |
| 121 | $apiUrl ], $e ); |
| 122 | } |
| 123 | |
| 124 | return json_decode( $request->getContent(), true ); |
| 125 | } |
| 126 | |
| 127 | private function getQueryParamTitle( SourceUrl $sourceUrl ): string { |
| 128 | $domain = $this->getHostWithoutTopLevelDomain( $sourceUrl ); |
| 129 | |
| 130 | if ( ctype_alpha( $domain ) ) { |
| 131 | // Default to "www.mediawiki", even when the source URL was "https://mediawiki.org/…" |
| 132 | $domain = 'www.' . $domain; |
| 133 | } |
| 134 | |
| 135 | return str_replace( ' ', '_', $this->configBasePageName ) . $domain; |
| 136 | } |
| 137 | |
| 138 | /** |
| 139 | * @return string Full host with all subdomains, but without the top-level domain (if a |
| 140 | * top-level domain was given), e.g. "en.wikipedia". |
| 141 | */ |
| 142 | private function getHostWithoutTopLevelDomain( SourceUrl $sourceUrl ): string { |
| 143 | $domain = $sourceUrl->getHost(); |
| 144 | |
| 145 | // Reuse the original configuration pages for test imports from the Beta cluster |
| 146 | $domain = str_replace( '.beta.wmflabs.org', '.org', $domain ); |
| 147 | |
| 148 | return preg_replace( '/\.\w+$/', '', $domain ); |
| 149 | } |
| 150 | |
| 151 | } |