Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
88.10% covered (warning)
88.10%
37 / 42
33.33% covered (danger)
33.33%
1 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
HttpApiLookup
88.10% covered (warning)
88.10%
37 / 42
33.33% covered (danger)
33.33%
1 / 3
13.29
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setLogger
n/a
0 / 0
n/a
0 / 0
1
 getApiUrl
77.78% covered (warning)
77.78%
7 / 9
0.00% covered (danger)
0.00%
0 / 1
3.10
 actuallyGetApiUrl
90.32% covered (success)
90.32%
28 / 31
0.00% covered (danger)
0.00%
0 / 1
8.06
1<?php
2
3namespace FileImporter\Remote\MediaWiki;
4
5use DOMDocument;
6use DOMElement;
7use FileImporter\Data\SourceUrl;
8use FileImporter\Exceptions\HttpRequestException;
9use FileImporter\Exceptions\ImportException;
10use FileImporter\Exceptions\LocalizedImportException;
11use FileImporter\Services\Http\HttpRequestExecutor;
12use Message;
13use Psr\Log\LoggerAwareInterface;
14use Psr\Log\LoggerInterface;
15use Psr\Log\NullLogger;
16
17/**
18 * Lookup that can take a MediaWiki site URL and return the URL of the action API.
19 * This service caches APIs that have been found for the lifetime of the object.
20 *
21 * @license GPL-2.0-or-later
22 * @author Addshore
23 */
24class HttpApiLookup implements LoggerAwareInterface {
25
26    /**
27     * @var LoggerInterface
28     */
29    private $logger;
30
31    /**
32     * @var HttpRequestExecutor
33     */
34    private $httpRequestExecutor;
35
36    /**
37     * @var string[] url => apiUrl
38     */
39    private $resultCache = [];
40
41    /**
42     * @param HttpRequestExecutor $httpRequestExecutor
43     */
44    public function __construct( HttpRequestExecutor $httpRequestExecutor ) {
45        $this->httpRequestExecutor = $httpRequestExecutor;
46        $this->logger = new NullLogger();
47    }
48
49    /**
50     * @param LoggerInterface $logger
51     * @codeCoverageIgnore
52     */
53    public function setLogger( LoggerInterface $logger ) {
54        $this->logger = $logger;
55    }
56
57    /**
58     * @param SourceUrl $sourceUrl A URL that points to any editable HTML page in any MediaWiki
59     *  wiki. The page is expected to contain a <link rel="EditURI" href="…"> element.
60     *
61     * @return string URL of api.php
62     * @throws ImportException when the request failed
63     */
64    public function getApiUrl( SourceUrl $sourceUrl ) {
65        $pageUrl = $sourceUrl->getUrl();
66
67        if ( array_key_exists( $pageUrl, $this->resultCache ) ) {
68            return $this->resultCache[$pageUrl];
69        }
70
71        $api = $this->actuallyGetApiUrl( $pageUrl );
72        if ( $api ) {
73            $this->resultCache[$pageUrl] = $api;
74            return $api;
75        }
76
77        $this->logger->error( 'Failed to get MediaWiki API from SourceUrl.' );
78        throw new LocalizedImportException( 'fileimporter-mediawiki-api-notfound' );
79    }
80
81    /**
82     * @param string $pageUrl
83     *
84     * @return string|null
85     * @throws ImportException when the request failed
86     */
87    private function actuallyGetApiUrl( $pageUrl ) {
88        try {
89            $req = $this->httpRequestExecutor->execute( $pageUrl );
90        } catch ( HttpRequestException $ex ) {
91            $statusCode = $ex->getHttpRequest()->getStatus();
92            $errors = $ex->getStatusValue()->getErrors();
93            $error = reset( $errors );
94
95            if ( $statusCode === 404 ) {
96                $msg = [ 'fileimporter-api-file-notfound', Message::plaintextParam( $pageUrl ) ];
97            } else {
98                $msg = [
99                    'fileimporter-api-failedtofindapi',
100                    $pageUrl,
101                    // Note: If a parameter to a Message is another Message, it will be forced to
102                    // use the same language.
103                    $statusCode !== 200
104                        ? wfMessage( 'fileimporter-http-statuscode', $statusCode )
105                        : '',
106                    $error
107                        ? wfMessage( $error['message'], $error['params'] )
108                        : ''
109                ];
110            }
111
112            $this->logger->error( 'Failed to discover API location from: ' . $pageUrl, [
113                'statusCode' => $statusCode,
114                'previousMessage' => $error ? $error['message'] : '',
115                'responseContent' => $ex->getHttpRequest()->getContent(),
116            ] );
117            throw new LocalizedImportException( $msg );
118        }
119
120        $document = new DOMDocument();
121
122        $oldXmlErrorUsage = libxml_use_internal_errors( true );
123
124        $document->loadHTML( $req->getContent() );
125
126        libxml_clear_errors();
127        libxml_use_internal_errors( $oldXmlErrorUsage );
128
129        $elements = $document->getElementsByTagName( 'link' );
130        foreach ( $elements as $element ) {
131            /** @var DOMElement $element */
132            if ( $element->getAttribute( 'rel' ) === 'EditURI' ) {
133                $editUri = $element->getAttribute( 'href' );
134                $api = str_replace( '?action=rsd', '', $editUri );
135                // Always prefer HTTPS because of (optional) edit/delete requests, see T228851
136                return wfExpandUrl( $api, PROTO_HTTPS );
137            }
138        }
139
140        $this->logger->error(
141            'Failed to discover API location from: "' . $pageUrl . '".',
142            [
143                'responseContent' => $req->getContent(),
144            ]
145        );
146
147        return null;
148    }
149
150}