Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
82.35% |
42 / 51 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
HttpApiLookup | |
82.35% |
42 / 51 |
|
33.33% |
1 / 3 |
13.93 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
setLogger | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
getApiUrl | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
3.10 | |||
actuallyGetApiUrl | |
82.50% |
33 / 40 |
|
0.00% |
0 / 1 |
8.34 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use DOMDocument; |
6 | use DOMElement; |
7 | use FileImporter\Data\SourceUrl; |
8 | use FileImporter\Exceptions\HttpRequestException; |
9 | use FileImporter\Exceptions\ImportException; |
10 | use FileImporter\Exceptions\LocalizedImportException; |
11 | use FileImporter\Services\Http\HttpRequestExecutor; |
12 | use MediaWiki\Message\Message; |
13 | use Psr\Log\LoggerAwareInterface; |
14 | use Psr\Log\LoggerInterface; |
15 | use Psr\Log\NullLogger; |
16 | |
17 | /** |
18 | * Lookup that can take a MediaWiki site URL and return the URL of the action API. |
19 | * This service caches APIs that have been found for the lifetime of the object. |
20 | * |
21 | * @license GPL-2.0-or-later |
22 | * @author Addshore |
23 | */ |
24 | class HttpApiLookup implements LoggerAwareInterface { |
25 | |
26 | private HttpRequestExecutor $httpRequestExecutor; |
27 | private LoggerInterface $logger; |
28 | |
29 | /** @var string[] url => apiUrl */ |
30 | private array $resultCache = []; |
31 | |
32 | public function __construct( HttpRequestExecutor $httpRequestExecutor ) { |
33 | $this->httpRequestExecutor = $httpRequestExecutor; |
34 | $this->logger = new NullLogger(); |
35 | } |
36 | |
37 | /** |
38 | * @codeCoverageIgnore |
39 | */ |
40 | public function setLogger( LoggerInterface $logger ) { |
41 | $this->logger = $logger; |
42 | } |
43 | |
44 | /** |
45 | * @param SourceUrl $sourceUrl A URL that points to any editable HTML page in any MediaWiki |
46 | * wiki. The page is expected to contain a <link rel="EditURI" href="…"> element. |
47 | * |
48 | * @return string URL of api.php |
49 | * @throws ImportException when the request failed |
50 | */ |
51 | public function getApiUrl( SourceUrl $sourceUrl ) { |
52 | $pageUrl = $sourceUrl->getUrl(); |
53 | |
54 | if ( array_key_exists( $pageUrl, $this->resultCache ) ) { |
55 | return $this->resultCache[$pageUrl]; |
56 | } |
57 | |
58 | $api = $this->actuallyGetApiUrl( $pageUrl ); |
59 | if ( $api ) { |
60 | $this->resultCache[$pageUrl] = $api; |
61 | return $api; |
62 | } |
63 | |
64 | $this->logger->error( 'Failed to get MediaWiki API from SourceUrl.' ); |
65 | throw new LocalizedImportException( 'fileimporter-mediawiki-api-notfound' ); |
66 | } |
67 | |
68 | /** |
69 | * @throws ImportException when the request failed |
70 | */ |
71 | private function actuallyGetApiUrl( string $pageUrl ): ?string { |
72 | try { |
73 | $req = $this->httpRequestExecutor->execute( $pageUrl ); |
74 | } catch ( HttpRequestException $ex ) { |
75 | $statusCode = $ex->getHttpRequest()->getStatus(); |
76 | $error = $ex->getStatusValue()->getMessages()[0] ?? null; |
77 | |
78 | if ( $statusCode === 404 ) { |
79 | $msg = [ 'fileimporter-api-file-notfound', Message::plaintextParam( $pageUrl ) ]; |
80 | } else { |
81 | $msg = [ |
82 | 'fileimporter-api-failedtofindapi', |
83 | $pageUrl, |
84 | // Note: If a parameter to a Message is another Message, it will be forced to |
85 | // use the same language. |
86 | $statusCode !== 200 |
87 | ? wfMessage( 'fileimporter-http-statuscode', $statusCode ) |
88 | : '', |
89 | $error |
90 | ? wfMessage( $error ) |
91 | : '' |
92 | ]; |
93 | } |
94 | |
95 | $this->logger->error( 'Failed to discover API location from: ' . $pageUrl, [ |
96 | 'statusCode' => $statusCode, |
97 | 'previousMessage' => $error ? $error->getKey() : '', |
98 | 'responseContent' => $ex->getHttpRequest()->getContent(), |
99 | ] ); |
100 | throw new LocalizedImportException( $msg ); |
101 | } |
102 | |
103 | $document = new DOMDocument(); |
104 | |
105 | $oldXmlErrorUsage = libxml_use_internal_errors( true ); |
106 | |
107 | $document->loadHTML( $req->getContent() ); |
108 | |
109 | libxml_clear_errors(); |
110 | libxml_use_internal_errors( $oldXmlErrorUsage ); |
111 | |
112 | $elements = $document->getElementsByTagName( 'link' ); |
113 | foreach ( $elements as $element ) { |
114 | /** @var DOMElement $element */ |
115 | if ( $element->getAttribute( 'rel' ) === 'EditURI' ) { |
116 | $editUri = $element->getAttribute( 'href' ); |
117 | $api = str_replace( '?action=rsd', '', $editUri ); |
118 | // Always prefer HTTPS because of (optional) edit/delete requests, see T228851 |
119 | return wfExpandUrl( $api, PROTO_HTTPS ); |
120 | } |
121 | } |
122 | |
123 | $this->logger->error( |
124 | 'Failed to discover API location from: "' . $pageUrl . '".', |
125 | [ |
126 | 'responseContent' => $req->getContent(), |
127 | ] |
128 | ); |
129 | |
130 | return null; |
131 | } |
132 | |
133 | } |