Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
82.35% |
42 / 51 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
| HttpApiLookup | |
82.35% |
42 / 51 |
|
33.33% |
1 / 3 |
13.93 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setLogger | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
| getApiUrl | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
3.10 | |||
| actuallyGetApiUrl | |
82.93% |
34 / 41 |
|
0.00% |
0 / 1 |
8.32 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace FileImporter\Remote\MediaWiki; |
| 4 | |
| 5 | use DOMDocument; |
| 6 | use DOMElement; |
| 7 | use FileImporter\Data\SourceUrl; |
| 8 | use FileImporter\Exceptions\HttpRequestException; |
| 9 | use FileImporter\Exceptions\ImportException; |
| 10 | use FileImporter\Exceptions\LocalizedImportException; |
| 11 | use FileImporter\Services\Http\HttpRequestExecutor; |
| 12 | use MediaWiki\MediaWikiServices; |
| 13 | use MediaWiki\Message\Message; |
| 14 | use Psr\Log\LoggerAwareInterface; |
| 15 | use Psr\Log\LoggerInterface; |
| 16 | use Psr\Log\NullLogger; |
| 17 | |
| 18 | /** |
| 19 | * Lookup that can take a MediaWiki site URL and return the URL of the action API. |
| 20 | * This service caches APIs that have been found for the lifetime of the object. |
| 21 | * |
| 22 | * @license GPL-2.0-or-later |
| 23 | * @author Addshore |
| 24 | */ |
| 25 | class HttpApiLookup implements LoggerAwareInterface { |
| 26 | |
| 27 | private LoggerInterface $logger; |
| 28 | |
| 29 | /** @var string[] url => apiUrl */ |
| 30 | private array $resultCache = []; |
| 31 | |
| 32 | public function __construct( |
| 33 | private readonly HttpRequestExecutor $httpRequestExecutor, |
| 34 | ) { |
| 35 | $this->logger = new NullLogger(); |
| 36 | } |
| 37 | |
| 38 | /** |
| 39 | * @codeCoverageIgnore |
| 40 | */ |
| 41 | public function setLogger( LoggerInterface $logger ): void { |
| 42 | $this->logger = $logger; |
| 43 | } |
| 44 | |
| 45 | /** |
| 46 | * @param SourceUrl $sourceUrl A URL that points to any editable HTML page in any MediaWiki |
| 47 | * wiki. The page is expected to contain a <link rel="EditURI" href="…"> element. |
| 48 | * |
| 49 | * @return string URL of api.php |
| 50 | * @throws ImportException when the request failed |
| 51 | */ |
| 52 | public function getApiUrl( SourceUrl $sourceUrl ) { |
| 53 | $pageUrl = $sourceUrl->getUrl(); |
| 54 | |
| 55 | if ( array_key_exists( $pageUrl, $this->resultCache ) ) { |
| 56 | return $this->resultCache[$pageUrl]; |
| 57 | } |
| 58 | |
| 59 | $api = $this->actuallyGetApiUrl( $pageUrl ); |
| 60 | if ( $api ) { |
| 61 | $this->resultCache[$pageUrl] = $api; |
| 62 | return $api; |
| 63 | } |
| 64 | |
| 65 | $this->logger->error( 'Failed to get MediaWiki API from SourceUrl.' ); |
| 66 | throw new LocalizedImportException( 'fileimporter-mediawiki-api-notfound' ); |
| 67 | } |
| 68 | |
| 69 | /** |
| 70 | * @throws ImportException when the request failed |
| 71 | */ |
| 72 | private function actuallyGetApiUrl( string $pageUrl ): ?string { |
| 73 | try { |
| 74 | $req = $this->httpRequestExecutor->execute( $pageUrl ); |
| 75 | } catch ( HttpRequestException $ex ) { |
| 76 | $statusCode = $ex->getHttpRequest()->getStatus(); |
| 77 | $error = $ex->getStatusValue()->getMessages()[0] ?? null; |
| 78 | |
| 79 | if ( $statusCode === 404 ) { |
| 80 | $msg = [ 'fileimporter-api-file-notfound', Message::plaintextParam( $pageUrl ) ]; |
| 81 | } else { |
| 82 | $msg = [ |
| 83 | 'fileimporter-api-failedtofindapi', |
| 84 | $pageUrl, |
| 85 | // Note: If a parameter to a Message is another Message, it will be forced to |
| 86 | // use the same language. |
| 87 | $statusCode !== 200 |
| 88 | ? wfMessage( 'fileimporter-http-statuscode', $statusCode ) |
| 89 | : '', |
| 90 | $error |
| 91 | ? wfMessage( $error ) |
| 92 | : '' |
| 93 | ]; |
| 94 | } |
| 95 | |
| 96 | $this->logger->error( 'Failed to discover API location from: ' . $pageUrl, [ |
| 97 | 'statusCode' => $statusCode, |
| 98 | 'previousMessage' => $error ? $error->getKey() : '', |
| 99 | 'responseContent' => $ex->getHttpRequest()->getContent(), |
| 100 | ] ); |
| 101 | throw new LocalizedImportException( $msg ); |
| 102 | } |
| 103 | |
| 104 | $document = new DOMDocument(); |
| 105 | |
| 106 | $oldXmlErrorUsage = libxml_use_internal_errors( true ); |
| 107 | |
| 108 | $document->loadHTML( $req->getContent() ); |
| 109 | |
| 110 | libxml_clear_errors(); |
| 111 | libxml_use_internal_errors( $oldXmlErrorUsage ); |
| 112 | |
| 113 | $elements = $document->getElementsByTagName( 'link' ); |
| 114 | foreach ( $elements as $element ) { |
| 115 | /** @var DOMElement $element */ |
| 116 | if ( $element->getAttribute( 'rel' ) === 'EditURI' ) { |
| 117 | $editUri = $element->getAttribute( 'href' ); |
| 118 | $api = str_replace( '?action=rsd', '', $editUri ); |
| 119 | // Always prefer HTTPS because of (optional) edit/delete requests, see T228851 |
| 120 | $services = MediaWikiServices::getInstance(); |
| 121 | return $services->getUrlUtils()->expand( $api, PROTO_HTTPS ); |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | $this->logger->error( |
| 126 | 'Failed to discover API location from: "' . $pageUrl . '".', |
| 127 | [ |
| 128 | 'responseContent' => $req->getContent(), |
| 129 | ] |
| 130 | ); |
| 131 | |
| 132 | return null; |
| 133 | } |
| 134 | |
| 135 | } |