Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.95% |
306 / 378 |
|
29.41% |
5 / 17 |
CRAP | |
0.00% |
0 / 1 |
MultiHttpClient | |
81.17% |
306 / 377 |
|
29.41% |
5 / 17 |
193.30 | |
0.00% |
0 / 1 |
__construct | |
76.92% |
10 / 13 |
|
0.00% |
0 / 1 |
5.31 | |||
run | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
runMulti | |
71.43% |
15 / 21 |
|
0.00% |
0 / 1 |
13.82 | |||
isCurlEnabled | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
runMultiCurl | |
77.63% |
59 / 76 |
|
0.00% |
0 / 1 |
13.61 | |||
getCurlHandle | |
64.77% |
57 / 88 |
|
0.00% |
0 / 1 |
43.16 | |||
getCurlMulti | |
87.50% |
14 / 16 |
|
0.00% |
0 / 1 |
6.07 | |||
getCurlTime | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
runMultiHttp | |
98.00% |
49 / 50 |
|
0.00% |
0 / 1 |
10 | |||
normalizeHeaders | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
normalizeRequests | |
93.02% |
40 / 43 |
|
0.00% |
0 / 1 |
12.05 | |||
useReverseProxy | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
4.16 | |||
assembleUrl | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
10 | |||
isLocalURL | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
6.01 | |||
getSelectTimeout | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
3.10 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__destruct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * HTTP service client |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | namespace Wikimedia\Http; |
24 | |
25 | use InvalidArgumentException; |
26 | use MediaWiki\MediaWikiServices; |
27 | use Psr\Log\LoggerAwareInterface; |
28 | use Psr\Log\LoggerInterface; |
29 | use Psr\Log\NullLogger; |
30 | use RuntimeException; |
31 | |
32 | /** |
33 | * Class to handle multiple HTTP requests |
34 | * |
35 | * If curl is available, requests will be made concurrently. |
36 | * Otherwise, they will be made serially. |
37 | * |
38 | * HTTP request maps are arrays that use the following format: |
39 | * - method : GET/HEAD/PUT/POST/DELETE |
40 | * - url : HTTP/HTTPS URL |
41 | * - query : <query parameter field/value associative array> (uses RFC 3986) |
42 | * - headers : <header name/value associative array> |
43 | * - body : source to get the HTTP request body from; |
44 | * this can simply be a string (always), a resource for |
45 | * PUT requests, and a field/value array for POST request; |
46 | * array bodies are encoded as multipart/form-data and strings |
47 | * use application/x-www-form-urlencoded (headers sent automatically) |
48 | * - stream : resource to stream the HTTP response body to |
49 | * - proxy : HTTP proxy to use |
50 | * - flags : map of boolean flags which supports: |
51 | * - relayResponseHeaders : write out header via header() |
52 | * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'. |
53 | * |
54 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
55 | * a client object with appropriately configured timeouts. |
56 | * |
57 | * @since 1.23 |
58 | */ |
59 | class MultiHttpClient implements LoggerAwareInterface { |
60 | /** Regex for headers likely to contain tokens, etc. that we want to redact from logs */ |
61 | private const SENSITIVE_HEADERS = '/(^|-|_)(authorization|auth|password|cookie)($|-|_)/'; |
62 | /** |
63 | * @phpcs:ignore MediaWiki.Commenting.PropertyDocumentation.ObjectTypeHintVar |
64 | * @var resource|object|null curl_multi_init() handle, initialized in getCurlMulti() |
65 | */ |
66 | protected $cmh = null; |
67 | /** @var string|null SSL certificates path */ |
68 | protected $caBundlePath; |
69 | /** @var float */ |
70 | protected $connTimeout = 10; |
71 | /** @var float */ |
72 | protected $maxConnTimeout = INF; |
73 | /** @var float */ |
74 | protected $reqTimeout = 30; |
75 | /** @var float */ |
76 | protected $maxReqTimeout = INF; |
77 | /** @var bool */ |
78 | protected $usePipelining = false; |
79 | /** @var int */ |
80 | protected $maxConnsPerHost = 50; |
81 | /** @var string|null */ |
82 | protected $proxy; |
83 | /** @var string|false */ |
84 | protected $localProxy = false; |
85 | /** @var string[] */ |
86 | protected $localVirtualHosts = []; |
87 | /** @var string */ |
88 | protected $userAgent = 'wikimedia/multi-http-client v1.1'; |
89 | /** @var LoggerInterface */ |
90 | protected $logger; |
91 | protected array $headers = []; |
92 | |
93 | // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect |
94 | // timeouts are periodically polled instead of being accurately respected. |
95 | // The select timeout is set to the minimum timeout multiplied by this factor. |
96 | private const TIMEOUT_ACCURACY_FACTOR = 0.1; |
97 | |
98 | private ?TelemetryHeadersInterface $telemetry = null; |
99 | |
100 | /** |
101 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
102 | * a client object with appropriately configured timeouts instead of constructing |
103 | * a MultiHttpClient directly. |
104 | * |
105 | * @param array $options |
106 | * - connTimeout : default connection timeout (seconds) |
107 | * - reqTimeout : default request timeout (seconds) |
108 | * - maxConnTimeout : maximum connection timeout (seconds) |
109 | * - maxReqTimeout : maximum request timeout (seconds) |
110 | * - proxy : HTTP proxy to use |
111 | * - localProxy : Reverse proxy to use for domains in localVirtualHosts |
112 | * - localVirtualHosts : Domains that are configured as virtual hosts on the same machine |
113 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
114 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
115 | * - userAgent : The User-Agent header value to send |
116 | * - logger : a \Psr\Log\LoggerInterface instance for debug logging |
117 | * - caBundlePath : path to specific Certificate Authority bundle (if any) |
118 | * - headers : an array of default headers to send with every request |
119 | * - telemetry : a \Wikimedia\Http\RequestTelemetry instance to track telemetry data |
120 | * @throws \Exception |
121 | */ |
122 | public function __construct( array $options ) { |
123 | if ( isset( $options['caBundlePath'] ) ) { |
124 | $this->caBundlePath = $options['caBundlePath']; |
125 | if ( !file_exists( $this->caBundlePath ) ) { |
126 | throw new InvalidArgumentException( "Cannot find CA bundle: " . $this->caBundlePath ); |
127 | } |
128 | } |
129 | static $opts = [ |
130 | 'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout', |
131 | 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger', |
132 | 'localProxy', 'localVirtualHosts', 'headers', 'telemetry' |
133 | ]; |
134 | foreach ( $opts as $key ) { |
135 | if ( isset( $options[$key] ) ) { |
136 | $this->$key = $options[$key]; |
137 | } |
138 | } |
139 | $this->logger ??= new NullLogger; |
140 | } |
141 | |
142 | /** |
143 | * Execute an HTTP(S) request |
144 | * |
145 | * This method returns a response map of: |
146 | * - code : HTTP response code or 0 if there was a serious error |
147 | * - reason : HTTP response reason (empty if there was a serious error) |
148 | * - headers : <header name/value associative array> |
149 | * - body : HTTP response body or resource (if "stream" was set) |
150 | * - error : Any error string |
151 | * The map also stores integer-indexed copies of these values. This lets callers do: |
152 | * @code |
153 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $http->run( $req ); |
154 | * @endcode |
155 | * @param array $req HTTP request array |
156 | * @param array $opts |
157 | * - connTimeout : connection timeout per request (seconds) |
158 | * - reqTimeout : post-connection timeout per request (seconds) |
159 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
160 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
161 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2' or 'v2.0'. Leave empty to use |
162 | * PHP/curl's default |
163 | * @param string $caller The method making this request, for attribution in logs |
164 | * @return array Response array for request |
165 | */ |
166 | public function run( array $req, array $opts = [], string $caller = __METHOD__ ) { |
167 | return $this->runMulti( [ $req ], $opts, $caller )[0]['response']; |
168 | } |
169 | |
170 | /** |
171 | * Execute a set of HTTP(S) requests. |
172 | * |
173 | * If curl is available, requests will be made concurrently. |
174 | * Otherwise, they will be made serially. |
175 | * |
176 | * The maps are returned by this method with the 'response' field set to a map of: |
177 | * - code : HTTP response code or 0 if there was a serious error |
178 | * - reason : HTTP response reason (empty if there was a serious error) |
179 | * - headers : <header name/value associative array> |
180 | * - body : HTTP response body or resource (if "stream" was set) |
181 | * - error : Any error string |
182 | * The map also stores integer-indexed copies of these values. This lets callers do: |
183 | * @code |
184 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $req['response']; |
185 | * @endcode |
186 | * All headers in the 'headers' field are normalized to use lower case names. |
187 | * This is true for the request headers and the response headers. Integer-indexed |
188 | * method/URL entries will also be changed to use the corresponding string keys. |
189 | * |
190 | * @param array[] $reqs Map of HTTP request arrays |
191 | * @param array $opts Options |
192 | * - connTimeout : connection timeout per request (seconds) |
193 | * - reqTimeout : post-connection timeout per request (seconds) |
194 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
195 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
196 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2' or 'v2.0'. Leave empty to use |
197 | * PHP/curl's default |
198 | * @param string $caller The method making these requests, for attribution in logs |
199 | * @return array[] $reqs With response array populated for each |
200 | * @throws \Exception |
201 | */ |
202 | public function runMulti( array $reqs, array $opts = [], string $caller = __METHOD__ ) { |
203 | $this->normalizeRequests( $reqs ); |
204 | $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ]; |
205 | |
206 | if ( $this->maxConnTimeout && $opts['connTimeout'] > $this->maxConnTimeout ) { |
207 | $opts['connTimeout'] = $this->maxConnTimeout; |
208 | } |
209 | if ( $this->maxReqTimeout && $opts['reqTimeout'] > $this->maxReqTimeout ) { |
210 | $opts['reqTimeout'] = $this->maxReqTimeout; |
211 | } |
212 | |
213 | if ( $this->isCurlEnabled() ) { |
214 | switch ( $opts['httpVersion'] ?? null ) { |
215 | case 'v1.0': |
216 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_0; |
217 | break; |
218 | case 'v1.1': |
219 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_1; |
220 | break; |
221 | case 'v2': |
222 | case 'v2.0': |
223 | $opts['httpVersion'] = CURL_HTTP_VERSION_2_0; |
224 | break; |
225 | default: |
226 | $opts['httpVersion'] = CURL_HTTP_VERSION_NONE; |
227 | } |
228 | return $this->runMultiCurl( $reqs, $opts, $caller ); |
229 | } else { |
230 | # TODO: Add handling for httpVersion option |
231 | return $this->runMultiHttp( $reqs, $opts ); |
232 | } |
233 | } |
234 | |
235 | /** |
236 | * Determines if the curl extension is available |
237 | * |
238 | * @return bool true if curl is available, false otherwise. |
239 | */ |
240 | protected function isCurlEnabled() { |
241 | // Explicitly test if curl_multi* is blocked, as some users' hosts provide |
242 | // them with a modified curl with the multi-threaded parts removed(!) |
243 | return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' ); |
244 | } |
245 | |
246 | /** |
247 | * Execute a set of HTTP(S) requests concurrently |
248 | * |
249 | * @see MultiHttpClient::runMulti() |
250 | * |
251 | * @param array[] $reqs Map of HTTP request arrays |
252 | * @param array $opts |
253 | * - connTimeout : connection timeout per request (seconds) |
254 | * - reqTimeout : post-connection timeout per request (seconds) |
255 | * - usePipelining : whether to use HTTP pipelining if possible |
256 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
257 | * - httpVersion: : HTTP version to use |
258 | * @phan-param array{connTimeout?:int,reqTimeout?:int,usePipelining?:bool,maxConnsPerHost?:int} $opts |
259 | * @param string $caller The method making these requests, for attribution in logs |
260 | * @return array $reqs With response array populated for each |
261 | * @throws \Exception |
262 | * @suppress PhanTypeInvalidDimOffset |
263 | */ |
264 | private function runMultiCurl( array $reqs, array $opts, string $caller = __METHOD__ ) { |
265 | $chm = $this->getCurlMulti( $opts ); |
266 | |
267 | $selectTimeout = $this->getSelectTimeout( $opts ); |
268 | |
269 | // Add all of the required cURL handles... |
270 | $handles = []; |
271 | foreach ( $reqs as $index => &$req ) { |
272 | $handles[$index] = $this->getCurlHandle( $req, $opts ); |
273 | curl_multi_add_handle( $chm, $handles[$index] ); |
274 | } |
275 | unset( $req ); // don't assign over this by accident |
276 | |
277 | $infos = []; |
278 | // Execute the cURL handles concurrently... |
279 | $active = null; // handles still being processed |
280 | do { |
281 | // Do any available work... |
282 | $mrc = curl_multi_exec( $chm, $active ); |
283 | |
284 | if ( $mrc !== CURLM_OK ) { |
285 | $error = curl_multi_strerror( $mrc ); |
286 | $this->logger->error( 'curl_multi_exec() failed: {error}', [ |
287 | 'error' => $error, |
288 | 'exception' => new RuntimeException(), |
289 | 'method' => $caller, |
290 | ] ); |
291 | break; |
292 | } |
293 | |
294 | // Wait (if possible) for available work... |
295 | if ( $active > 0 && curl_multi_select( $chm, $selectTimeout ) === -1 ) { |
296 | $errno = curl_multi_errno( $chm ); |
297 | $error = curl_multi_strerror( $errno ); |
298 | $this->logger->error( 'curl_multi_select() failed: {error}', [ |
299 | 'error' => $error, |
300 | 'exception' => new RuntimeException(), |
301 | 'method' => $caller, |
302 | ] ); |
303 | } |
304 | } while ( $active > 0 ); |
305 | |
306 | $queuedMessages = null; |
307 | do { |
308 | $info = curl_multi_info_read( $chm, $queuedMessages ); |
309 | if ( $info !== false && $info['msg'] === CURLMSG_DONE ) { |
310 | // Note: cast to integer even works on PHP 8.0+ despite the |
311 | // handle being an object not a resource, because CurlHandle |
312 | // has a backwards-compatible cast_object handler. |
313 | $infos[(int)$info['handle']] = $info; |
314 | } |
315 | } while ( $queuedMessages > 0 ); |
316 | |
317 | // Remove all of the added cURL handles and check for errors... |
318 | foreach ( $reqs as $index => &$req ) { |
319 | $ch = $handles[$index]; |
320 | curl_multi_remove_handle( $chm, $ch ); |
321 | |
322 | if ( isset( $infos[(int)$ch] ) ) { |
323 | $info = $infos[(int)$ch]; |
324 | $errno = $info['result']; |
325 | if ( $errno !== 0 ) { |
326 | $req['response']['error'] = "(curl error: $errno)"; |
327 | if ( function_exists( 'curl_strerror' ) ) { |
328 | $req['response']['error'] .= " " . curl_strerror( $errno ); |
329 | } |
330 | $this->logger->error( 'Error fetching URL "{url}": {error}', [ |
331 | 'url' => $req['url'], |
332 | 'error' => $req['response']['error'], |
333 | 'exception' => new RuntimeException(), |
334 | 'method' => $caller, |
335 | ] ); |
336 | } else { |
337 | $this->logger->debug( |
338 | "HTTP complete: {method} {url} code={response_code} size={size} " . |
339 | "total={total_time} connect={connect_time}", |
340 | [ |
341 | 'method' => $req['method'], |
342 | 'url' => $req['url'], |
343 | 'response_code' => $req['response']['code'], |
344 | 'size' => curl_getinfo( $ch, CURLINFO_SIZE_DOWNLOAD ), |
345 | 'total_time' => $this->getCurlTime( |
346 | $ch, CURLINFO_TOTAL_TIME, 'CURLINFO_TOTAL_TIME_T' |
347 | ), |
348 | 'connect_time' => $this->getCurlTime( |
349 | $ch, CURLINFO_CONNECT_TIME, 'CURLINFO_CONNECT_TIME_T' |
350 | ), |
351 | ] |
352 | ); |
353 | } |
354 | } else { |
355 | $req['response']['error'] = "(curl error: no status set)"; |
356 | } |
357 | |
358 | // For convenience with array destructuring |
359 | $req['response'][0] = $req['response']['code']; |
360 | $req['response'][1] = $req['response']['reason']; |
361 | $req['response'][2] = $req['response']['headers']; |
362 | $req['response'][3] = $req['response']['body']; |
363 | $req['response'][4] = $req['response']['error']; |
364 | curl_close( $ch ); |
365 | // Close any string wrapper file handles |
366 | if ( isset( $req['_closeHandle'] ) ) { |
367 | fclose( $req['_closeHandle'] ); |
368 | unset( $req['_closeHandle'] ); |
369 | } |
370 | } |
371 | unset( $req ); // don't assign over this by accident |
372 | |
373 | return $reqs; |
374 | } |
375 | |
376 | /** |
377 | * @param array &$req HTTP request map |
378 | * @phpcs:ignore Generic.Files.LineLength |
379 | * @phan-param array{url:string,proxy?:?string,query:mixed,method:string,body:string|resource,headers:array<string,string>,stream?:resource,flags:array} $req |
380 | * @param array $opts |
381 | * - connTimeout : default connection timeout |
382 | * - reqTimeout : default request timeout |
383 | * - httpVersion: default HTTP version |
384 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
385 | * @return resource|object |
386 | * @throws \Exception |
387 | */ |
388 | protected function getCurlHandle( array &$req, array $opts ) { |
389 | $ch = curl_init(); |
390 | |
391 | curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy ); |
392 | curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) ); |
393 | curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) ); |
394 | curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 ); |
395 | curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 ); |
396 | curl_setopt( $ch, CURLOPT_HEADER, 0 ); |
397 | if ( $this->caBundlePath !== null ) { |
398 | curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true ); |
399 | curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath ); |
400 | } |
401 | curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); |
402 | |
403 | $url = $req['url']; |
404 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
405 | if ( $query != '' ) { |
406 | $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; |
407 | } |
408 | curl_setopt( $ch, CURLOPT_URL, $url ); |
409 | curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] ); |
410 | curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) ); |
411 | curl_setopt( $ch, CURLOPT_HTTP_VERSION, $opts['httpVersion'] ?? CURL_HTTP_VERSION_NONE ); |
412 | |
413 | if ( $req['method'] === 'PUT' ) { |
414 | curl_setopt( $ch, CURLOPT_PUT, 1 ); |
415 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
416 | if ( is_resource( $req['body'] ) ) { |
417 | curl_setopt( $ch, CURLOPT_INFILE, $req['body'] ); |
418 | if ( isset( $req['headers']['content-length'] ) ) { |
419 | curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] ); |
420 | } elseif ( isset( $req['headers']['transfer-encoding'] ) && |
421 | $req['headers']['transfer-encoding'] === 'chunks' |
422 | ) { |
423 | curl_setopt( $ch, CURLOPT_UPLOAD, true ); |
424 | } else { |
425 | throw new InvalidArgumentException( "Missing 'Content-Length' or 'Transfer-Encoding' header." ); |
426 | } |
427 | } elseif ( $req['body'] !== '' ) { |
428 | $fp = fopen( "php://temp", "wb+" ); |
429 | fwrite( $fp, $req['body'], strlen( $req['body'] ) ); |
430 | rewind( $fp ); |
431 | curl_setopt( $ch, CURLOPT_INFILE, $fp ); |
432 | curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) ); |
433 | $req['_closeHandle'] = $fp; // remember to close this later |
434 | } else { |
435 | curl_setopt( $ch, CURLOPT_INFILESIZE, 0 ); |
436 | } |
437 | curl_setopt( $ch, CURLOPT_READFUNCTION, |
438 | static function ( $ch, $fd, $length ) { |
439 | return (string)fread( $fd, $length ); |
440 | } |
441 | ); |
442 | } elseif ( $req['method'] === 'POST' ) { |
443 | curl_setopt( $ch, CURLOPT_POST, 1 ); |
444 | curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] ); |
445 | } else { |
446 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
447 | if ( is_resource( $req['body'] ) || $req['body'] !== '' ) { |
448 | throw new InvalidArgumentException( "HTTP body specified for a non PUT/POST request." ); |
449 | } |
450 | $req['headers']['content-length'] = 0; |
451 | } |
452 | |
453 | if ( !isset( $req['headers']['user-agent'] ) ) { |
454 | $req['headers']['user-agent'] = $this->userAgent; |
455 | } |
456 | |
457 | $headers = []; |
458 | foreach ( $req['headers'] as $name => $value ) { |
459 | if ( strpos( $name, ':' ) !== false ) { |
460 | throw new InvalidArgumentException( "Header name must not contain colon-space." ); |
461 | } |
462 | $headers[] = $name . ': ' . trim( $value ); |
463 | } |
464 | curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); |
465 | |
466 | curl_setopt( $ch, CURLOPT_HEADERFUNCTION, |
467 | static function ( $ch, $header ) use ( &$req ) { |
468 | if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) { |
469 | header( $header ); |
470 | } |
471 | $length = strlen( $header ); |
472 | $matches = []; |
473 | if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) { |
474 | $req['response']['code'] = (int)$matches[2]; |
475 | $req['response']['reason'] = trim( $matches[3] ); |
476 | // After a redirect we will receive this again, but we already stored headers |
477 | // that belonged to a redirect response. Start over. |
478 | $req['response']['headers'] = []; |
479 | return $length; |
480 | } |
481 | if ( strpos( $header, ":" ) === false ) { |
482 | return $length; |
483 | } |
484 | [ $name, $value ] = explode( ":", $header, 2 ); |
485 | $name = strtolower( $name ); |
486 | $value = trim( $value ); |
487 | if ( isset( $req['response']['headers'][$name] ) ) { |
488 | $req['response']['headers'][$name] .= ', ' . $value; |
489 | } else { |
490 | $req['response']['headers'][$name] = $value; |
491 | } |
492 | return $length; |
493 | } |
494 | ); |
495 | |
496 | // This works with both file and php://temp handles (unlike CURLOPT_FILE) |
497 | $hasOutputStream = isset( $req['stream'] ); |
498 | curl_setopt( $ch, CURLOPT_WRITEFUNCTION, |
499 | static function ( $ch, $data ) use ( &$req, $hasOutputStream ) { |
500 | if ( $hasOutputStream ) { |
501 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive |
502 | return fwrite( $req['stream'], $data ); |
503 | } else { |
504 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
505 | $req['response']['body'] .= $data; |
506 | |
507 | return strlen( $data ); |
508 | } |
509 | } |
510 | ); |
511 | |
512 | return $ch; |
513 | } |
514 | |
515 | /** |
516 | * @param array $opts |
517 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
518 | * @return resource|object |
519 | * @throws \Exception |
520 | */ |
521 | protected function getCurlMulti( array $opts ) { |
522 | if ( !$this->cmh ) { |
523 | $cmh = curl_multi_init(); |
524 | // Limit the size of the idle connection cache such that consecutive parallel |
525 | // request batches to the same host can avoid having to keep making connections |
526 | curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); |
527 | $this->cmh = $cmh; |
528 | } |
529 | |
530 | $curlVersion = curl_version()['version']; |
531 | |
532 | // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0 |
533 | if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) { |
534 | // Limit the number of in-flight requests for any given host |
535 | $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost; |
536 | curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns ); |
537 | } |
538 | |
539 | if ( $opts['usePipelining'] ?? $this->usePipelining ) { |
540 | if ( version_compare( $curlVersion, '7.43', '<' ) ) { |
541 | // The option is a boolean |
542 | $pipelining = 1; |
543 | } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) { |
544 | // The option is a bitfield and HTTP/1.x pipelining is supported |
545 | $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX; |
546 | } else { |
547 | // The option is a bitfield but HTTP/1.x pipelining has been removed |
548 | $pipelining = CURLPIPE_MULTIPLEX; |
549 | } |
550 | // Suppress deprecation, we know already (T264735) |
551 | // phpcs:ignore Generic.PHP.NoSilencedErrors |
552 | @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining ); |
553 | } |
554 | |
555 | return $this->cmh; |
556 | } |
557 | |
558 | /** |
559 | * Get a time in seconds, formatted with microsecond resolution, or fall back to second |
560 | * resolution on PHP 7.2 |
561 | * |
562 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintParam |
563 | * @param resource|object $ch |
564 | * @param int $oldOption |
565 | * @param string $newConstName |
566 | * @return string |
567 | */ |
568 | private function getCurlTime( $ch, $oldOption, $newConstName ): string { |
569 | if ( defined( $newConstName ) ) { |
570 | return sprintf( "%.6F", curl_getinfo( $ch, constant( $newConstName ) ) / 1e6 ); |
571 | } else { |
572 | return (string)curl_getinfo( $ch, $oldOption ); |
573 | } |
574 | } |
575 | |
576 | /** |
577 | * Execute a set of HTTP(S) requests sequentially. |
578 | * |
579 | * @see MultiHttpClient::runMulti() |
580 | * @todo Remove dependency on MediaWikiServices: rewrite using Guzzle T202352 |
581 | * @param array $reqs Map of HTTP request arrays |
582 | * @phpcs:ignore Generic.Files.LineLength |
583 | * @phan-param array<int,array{url:string,query:array,method:string,body:string,headers:array<string,string>,proxy?:?string}> $reqs |
584 | * @param array $opts |
585 | * - connTimeout : connection timeout per request (seconds) |
586 | * - reqTimeout : post-connection timeout per request (seconds) |
587 | * @phan-param array{connTimeout:int,reqTimeout:int} $opts |
588 | * @return array $reqs With response array populated for each |
589 | * @throws \Exception |
590 | */ |
591 | private function runMultiHttp( array $reqs, array $opts = [] ) { |
592 | $httpOptions = [ |
593 | 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout, |
594 | 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout, |
595 | 'logger' => $this->logger, |
596 | 'caInfo' => $this->caBundlePath, |
597 | ]; |
598 | foreach ( $reqs as &$req ) { |
599 | $reqOptions = $httpOptions + [ |
600 | 'method' => $req['method'], |
601 | 'proxy' => $req['proxy'] ?? $this->proxy, |
602 | 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent, |
603 | 'postData' => $req['body'], |
604 | ]; |
605 | |
606 | $url = $req['url']; |
607 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
608 | if ( $query != '' ) { |
609 | $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; |
610 | } |
611 | |
612 | $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( |
613 | $url, $reqOptions, __METHOD__ ); |
614 | $httpRequest->setLogger( $this->logger ); |
615 | foreach ( $req['headers'] as $header => $value ) { |
616 | $httpRequest->setHeader( $header, $value ); |
617 | } |
618 | $sv = $httpRequest->execute()->getStatusValue(); |
619 | |
620 | $respHeaders = array_map( |
621 | static function ( $v ) { |
622 | return implode( ', ', $v ); |
623 | }, |
624 | $httpRequest->getResponseHeaders() ); |
625 | |
626 | $req['response'] = [ |
627 | 'code' => $httpRequest->getStatus(), |
628 | 'reason' => '', |
629 | 'headers' => $respHeaders, |
630 | 'body' => $httpRequest->getContent(), |
631 | 'error' => '', |
632 | ]; |
633 | |
634 | if ( !$sv->isOK() ) { |
635 | $svErrors = $sv->getErrors(); |
636 | if ( isset( $svErrors[0] ) ) { |
637 | $req['response']['error'] = $svErrors[0]['message']; |
638 | |
639 | // param values vary per failure type (ex. unknown host vs unknown page) |
640 | if ( isset( $svErrors[0]['params'][0] ) ) { |
641 | if ( is_numeric( $svErrors[0]['params'][0] ) ) { |
642 | if ( isset( $svErrors[0]['params'][1] ) ) { |
643 | // @phan-suppress-next-line PhanTypeInvalidDimOffset |
644 | $req['response']['reason'] = $svErrors[0]['params'][1]; |
645 | } |
646 | } else { |
647 | $req['response']['reason'] = $svErrors[0]['params'][0]; |
648 | } |
649 | } |
650 | } |
651 | } |
652 | |
653 | $req['response'][0] = $req['response']['code']; |
654 | $req['response'][1] = $req['response']['reason']; |
655 | $req['response'][2] = $req['response']['headers']; |
656 | $req['response'][3] = $req['response']['body']; |
657 | $req['response'][4] = $req['response']['error']; |
658 | } |
659 | |
660 | return $reqs; |
661 | } |
662 | |
663 | /** |
664 | * Normalize headers array |
665 | * @param array $headers |
666 | * @return array |
667 | */ |
668 | private function normalizeHeaders( array $headers ): array { |
669 | $normalized = []; |
670 | foreach ( $headers as $name => $value ) { |
671 | $normalized[strtolower( $name )] = $value; |
672 | } |
673 | return $normalized; |
674 | } |
675 | |
676 | /** |
677 | * Normalize request information |
678 | * |
679 | * @param array[] &$reqs the requests to normalize |
680 | */ |
681 | private function normalizeRequests( array &$reqs ) { |
682 | foreach ( $reqs as &$req ) { |
683 | $req['response'] = [ |
684 | 'code' => 0, |
685 | 'reason' => '', |
686 | 'headers' => [], |
687 | 'body' => '', |
688 | 'error' => '' |
689 | ]; |
690 | if ( isset( $req[0] ) ) { |
691 | $req['method'] = $req[0]; // short-form |
692 | unset( $req[0] ); |
693 | } |
694 | if ( isset( $req[1] ) ) { |
695 | $req['url'] = $req[1]; // short-form |
696 | unset( $req[1] ); |
697 | } |
698 | if ( !isset( $req['method'] ) ) { |
699 | throw new InvalidArgumentException( "Request has no 'method' field set." ); |
700 | } elseif ( !isset( $req['url'] ) ) { |
701 | throw new InvalidArgumentException( "Request has no 'url' field set." ); |
702 | } |
703 | if ( $this->localProxy !== false && $this->isLocalURL( $req['url'] ) ) { |
704 | $this->useReverseProxy( $req, $this->localProxy ); |
705 | } |
706 | $req['query'] ??= []; |
707 | $req['headers'] = $this->normalizeHeaders( |
708 | array_merge( |
709 | $this->headers, |
710 | $this->telemetry ? $this->telemetry->getRequestHeaders() : [], |
711 | $req['headers'] ?? [] |
712 | ) |
713 | ); |
714 | |
715 | if ( !isset( $req['body'] ) ) { |
716 | $req['body'] = ''; |
717 | $req['headers']['content-length'] = 0; |
718 | } |
719 | // Redact some headers we know to have tokens before logging them |
720 | $logHeaders = $req['headers']; |
721 | foreach ( $logHeaders as $header => $value ) { |
722 | if ( preg_match( self::SENSITIVE_HEADERS, $header ) === 1 ) { |
723 | $logHeaders[$header] = '[redacted]'; |
724 | } |
725 | } |
726 | $this->logger->debug( "HTTP start: {method} {url}", |
727 | [ |
728 | 'method' => $req['method'], |
729 | 'url' => $req['url'], |
730 | 'headers' => $logHeaders, |
731 | ] |
732 | ); |
733 | $req['flags'] ??= []; |
734 | } |
735 | } |
736 | |
737 | private function useReverseProxy( array &$req, string $proxy ) { |
738 | $parsedProxy = parse_url( $proxy ); |
739 | if ( $parsedProxy === false ) { |
740 | throw new InvalidArgumentException( "Invalid reverseProxy configured: $proxy" ); |
741 | } |
742 | $parsedUrl = parse_url( $req['url'] ); |
743 | if ( $parsedUrl === false ) { |
744 | throw new InvalidArgumentException( "Invalid url specified: {$req['url']}" ); |
745 | } |
746 | // Set the current host in the Host header |
747 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
748 | $req['headers']['Host'] = $parsedUrl['host']; |
749 | // Replace scheme, host and port in the request |
750 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
751 | $parsedUrl['scheme'] = $parsedProxy['scheme']; |
752 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
753 | $parsedUrl['host'] = $parsedProxy['host']; |
754 | if ( isset( $parsedProxy['port'] ) ) { |
755 | $parsedUrl['port'] = $parsedProxy['port']; |
756 | } else { |
757 | unset( $parsedUrl['port'] ); |
758 | } |
759 | $req['url'] = self::assembleUrl( $parsedUrl ); |
760 | // Explicitly disable use of another proxy by setting to false, |
761 | // since null will fallback to $this->proxy |
762 | $req['proxy'] = false; |
763 | } |
764 | |
765 | /** |
766 | * This is derived from MediaWiki\Utils\UrlUtils::assemble but changed to work |
767 | * with parse_url's result so the delimiter is hardcoded. |
768 | * |
769 | * The basic structure used: |
770 | * [scheme://][[user][:pass]@][host][:port][path][?query][#fragment] |
771 | * |
772 | * @param array $urlParts URL parts, as output from parse_url() |
773 | * @return string URL assembled from its component parts |
774 | */ |
775 | private static function assembleUrl( array $urlParts ): string { |
776 | $result = isset( $urlParts['scheme'] ) ? $urlParts['scheme'] . '://' : ''; |
777 | |
778 | if ( isset( $urlParts['host'] ) ) { |
779 | if ( isset( $urlParts['user'] ) ) { |
780 | $result .= $urlParts['user']; |
781 | if ( isset( $urlParts['pass'] ) ) { |
782 | $result .= ':' . $urlParts['pass']; |
783 | } |
784 | $result .= '@'; |
785 | } |
786 | |
787 | $result .= $urlParts['host']; |
788 | |
789 | if ( isset( $urlParts['port'] ) ) { |
790 | $result .= ':' . $urlParts['port']; |
791 | } |
792 | } |
793 | |
794 | if ( isset( $urlParts['path'] ) ) { |
795 | $result .= $urlParts['path']; |
796 | } |
797 | |
798 | if ( isset( $urlParts['query'] ) && $urlParts['query'] !== '' ) { |
799 | $result .= '?' . $urlParts['query']; |
800 | } |
801 | |
802 | if ( isset( $urlParts['fragment'] ) ) { |
803 | $result .= '#' . $urlParts['fragment']; |
804 | } |
805 | |
806 | return $result; |
807 | } |
808 | |
809 | /** |
810 | * Check if the URL can be served by localhost |
811 | * |
812 | * @note this is mostly a copy of MWHttpRequest::isLocalURL() |
813 | * @param string $url Full url to check |
814 | * @return bool |
815 | */ |
816 | private function isLocalURL( $url ) { |
817 | if ( !$this->localVirtualHosts ) { |
818 | // Shortcut |
819 | return false; |
820 | } |
821 | |
822 | // Extract host part |
823 | $matches = []; |
824 | if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) { |
825 | $host = $matches[1]; |
826 | // Split up dotwise |
827 | $domainParts = explode( '.', $host ); |
828 | // Check if this domain or any superdomain is listed as a local virtual host |
829 | $domainParts = array_reverse( $domainParts ); |
830 | |
831 | $domain = ''; |
832 | $countParts = count( $domainParts ); |
833 | for ( $i = 0; $i < $countParts; $i++ ) { |
834 | $domainPart = $domainParts[$i]; |
835 | if ( $i == 0 ) { |
836 | $domain = $domainPart; |
837 | } else { |
838 | $domain = $domainPart . '.' . $domain; |
839 | } |
840 | |
841 | if ( in_array( $domain, $this->localVirtualHosts ) ) { |
842 | return true; |
843 | } |
844 | } |
845 | } |
846 | |
847 | return false; |
848 | } |
849 | |
850 | /** |
851 | * Get a suitable select timeout for the given options. |
852 | * |
853 | * @param array $opts |
854 | * @return float |
855 | */ |
856 | private function getSelectTimeout( $opts ) { |
857 | $connTimeout = $opts['connTimeout'] ?? $this->connTimeout; |
858 | $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout; |
859 | $timeouts = array_filter( [ $connTimeout, $reqTimeout ] ); |
860 | if ( count( $timeouts ) === 0 ) { |
861 | return 1; |
862 | } |
863 | |
864 | $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR; |
865 | // Minimum 10us |
866 | if ( $selectTimeout < 10e-6 ) { |
867 | $selectTimeout = 10e-6; |
868 | } |
869 | return $selectTimeout; |
870 | } |
871 | |
872 | /** |
873 | * Register a logger |
874 | */ |
875 | public function setLogger( LoggerInterface $logger ): void { |
876 | $this->logger = $logger; |
877 | } |
878 | |
879 | public function __destruct() { |
880 | if ( $this->cmh ) { |
881 | curl_multi_close( $this->cmh ); |
882 | $this->cmh = null; |
883 | } |
884 | } |
885 | |
886 | } |
887 | /** @deprecated class alias since 1.43 */ |
888 | class_alias( MultiHttpClient::class, 'MultiHttpClient' ); |