Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
82.69% |
43 / 52 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
HttpApiLookup | |
82.69% |
43 / 52 |
|
33.33% |
1 / 3 |
13.88 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
setLogger | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
getApiUrl | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
3.10 | |||
actuallyGetApiUrl | |
82.93% |
34 / 41 |
|
0.00% |
0 / 1 |
8.32 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use DOMDocument; |
6 | use DOMElement; |
7 | use FileImporter\Data\SourceUrl; |
8 | use FileImporter\Exceptions\HttpRequestException; |
9 | use FileImporter\Exceptions\ImportException; |
10 | use FileImporter\Exceptions\LocalizedImportException; |
11 | use FileImporter\Services\Http\HttpRequestExecutor; |
12 | use Message; |
13 | use Psr\Log\LoggerAwareInterface; |
14 | use Psr\Log\LoggerInterface; |
15 | use Psr\Log\NullLogger; |
16 | |
17 | /** |
18 | * Lookup that can take a MediaWiki site URL and return the URL of the action API. |
19 | * This service caches APIs that have been found for the lifetime of the object. |
20 | * |
21 | * @license GPL-2.0-or-later |
22 | * @author Addshore |
23 | */ |
24 | class HttpApiLookup implements LoggerAwareInterface { |
25 | |
26 | private HttpRequestExecutor $httpRequestExecutor; |
27 | private LoggerInterface $logger; |
28 | |
29 | /** @var string[] url => apiUrl */ |
30 | private $resultCache = []; |
31 | |
32 | public function __construct( HttpRequestExecutor $httpRequestExecutor ) { |
33 | $this->httpRequestExecutor = $httpRequestExecutor; |
34 | $this->logger = new NullLogger(); |
35 | } |
36 | |
37 | /** |
38 | * @codeCoverageIgnore |
39 | */ |
40 | public function setLogger( LoggerInterface $logger ) { |
41 | $this->logger = $logger; |
42 | } |
43 | |
44 | /** |
45 | * @param SourceUrl $sourceUrl A URL that points to any editable HTML page in any MediaWiki |
46 | * wiki. The page is expected to contain a <link rel="EditURI" href="…"> element. |
47 | * |
48 | * @return string URL of api.php |
49 | * @throws ImportException when the request failed |
50 | */ |
51 | public function getApiUrl( SourceUrl $sourceUrl ) { |
52 | $pageUrl = $sourceUrl->getUrl(); |
53 | |
54 | if ( array_key_exists( $pageUrl, $this->resultCache ) ) { |
55 | return $this->resultCache[$pageUrl]; |
56 | } |
57 | |
58 | $api = $this->actuallyGetApiUrl( $pageUrl ); |
59 | if ( $api ) { |
60 | $this->resultCache[$pageUrl] = $api; |
61 | return $api; |
62 | } |
63 | |
64 | $this->logger->error( 'Failed to get MediaWiki API from SourceUrl.' ); |
65 | throw new LocalizedImportException( 'fileimporter-mediawiki-api-notfound' ); |
66 | } |
67 | |
68 | /** |
69 | * @throws ImportException when the request failed |
70 | */ |
71 | private function actuallyGetApiUrl( string $pageUrl ): ?string { |
72 | try { |
73 | $req = $this->httpRequestExecutor->execute( $pageUrl ); |
74 | } catch ( HttpRequestException $ex ) { |
75 | $statusCode = $ex->getHttpRequest()->getStatus(); |
76 | $errors = $ex->getStatusValue()->getErrors(); |
77 | $error = reset( $errors ); |
78 | |
79 | if ( $statusCode === 404 ) { |
80 | $msg = [ 'fileimporter-api-file-notfound', Message::plaintextParam( $pageUrl ) ]; |
81 | } else { |
82 | $msg = [ |
83 | 'fileimporter-api-failedtofindapi', |
84 | $pageUrl, |
85 | // Note: If a parameter to a Message is another Message, it will be forced to |
86 | // use the same language. |
87 | $statusCode !== 200 |
88 | ? wfMessage( 'fileimporter-http-statuscode', $statusCode ) |
89 | : '', |
90 | $error |
91 | ? wfMessage( $error['message'], $error['params'] ) |
92 | : '' |
93 | ]; |
94 | } |
95 | |
96 | $this->logger->error( 'Failed to discover API location from: ' . $pageUrl, [ |
97 | 'statusCode' => $statusCode, |
98 | 'previousMessage' => $error ? $error['message'] : '', |
99 | 'responseContent' => $ex->getHttpRequest()->getContent(), |
100 | ] ); |
101 | throw new LocalizedImportException( $msg ); |
102 | } |
103 | |
104 | $document = new DOMDocument(); |
105 | |
106 | $oldXmlErrorUsage = libxml_use_internal_errors( true ); |
107 | |
108 | $document->loadHTML( $req->getContent() ); |
109 | |
110 | libxml_clear_errors(); |
111 | libxml_use_internal_errors( $oldXmlErrorUsage ); |
112 | |
113 | $elements = $document->getElementsByTagName( 'link' ); |
114 | foreach ( $elements as $element ) { |
115 | /** @var DOMElement $element */ |
116 | if ( $element->getAttribute( 'rel' ) === 'EditURI' ) { |
117 | $editUri = $element->getAttribute( 'href' ); |
118 | $api = str_replace( '?action=rsd', '', $editUri ); |
119 | // Always prefer HTTPS because of (optional) edit/delete requests, see T228851 |
120 | return wfExpandUrl( $api, PROTO_HTTPS ); |
121 | } |
122 | } |
123 | |
124 | $this->logger->error( |
125 | 'Failed to discover API location from: "' . $pageUrl . '".', |
126 | [ |
127 | 'responseContent' => $req->getContent(), |
128 | ] |
129 | ); |
130 | |
131 | return null; |
132 | } |
133 | |
134 | } |