Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
58.77% |
211 / 359 |
|
23.53% |
4 / 17 |
CRAP | |
0.00% |
0 / 1 |
MultiHttpClient | |
58.94% |
211 / 358 |
|
23.53% |
4 / 17 |
964.00 | |
0.00% |
0 / 1 |
__construct | |
76.92% |
10 / 13 |
|
0.00% |
0 / 1 |
5.31 | |||
run | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
runMulti | |
38.10% |
8 / 21 |
|
0.00% |
0 / 1 |
39.70 | |||
isCurlEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
runMultiCurl | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
156 | |||
getCurlHandle | |
43.18% |
38 / 88 |
|
0.00% |
0 / 1 |
110.78 | |||
getCurlMulti | |
87.50% |
14 / 16 |
|
0.00% |
0 / 1 |
6.07 | |||
getCurlTime | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
runMultiHttp | |
98.00% |
49 / 50 |
|
0.00% |
0 / 1 |
10 | |||
normalizeHeaders | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
normalizeRequests | |
93.02% |
40 / 43 |
|
0.00% |
0 / 1 |
12.05 | |||
useReverseProxy | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
4.16 | |||
assembleUrl | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
10 | |||
isLocalURL | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
6.01 | |||
getSelectTimeout | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__destruct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * HTTP service client |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | namespace Wikimedia\Http; |
24 | |
25 | use InvalidArgumentException; |
26 | use MediaWiki\MediaWikiServices; |
27 | use Psr\Log\LoggerAwareInterface; |
28 | use Psr\Log\LoggerInterface; |
29 | use Psr\Log\NullLogger; |
30 | |
31 | /** |
32 | * Class to handle multiple HTTP requests |
33 | * |
34 | * If curl is available, requests will be made concurrently. |
35 | * Otherwise, they will be made serially. |
36 | * |
37 | * HTTP request maps are arrays that use the following format: |
38 | * - method : GET/HEAD/PUT/POST/DELETE |
39 | * - url : HTTP/HTTPS URL |
40 | * - query : <query parameter field/value associative array> (uses RFC 3986) |
41 | * - headers : <header name/value associative array> |
42 | * - body : source to get the HTTP request body from; |
43 | * this can simply be a string (always), a resource for |
44 | * PUT requests, and a field/value array for POST request; |
45 | * array bodies are encoded as multipart/form-data and strings |
46 | * use application/x-www-form-urlencoded (headers sent automatically) |
47 | * - stream : resource to stream the HTTP response body to |
48 | * - proxy : HTTP proxy to use |
49 | * - flags : map of boolean flags which supports: |
50 | * - relayResponseHeaders : write out header via header() |
51 | * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'. |
52 | * |
53 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
54 | * a client object with appropriately configured timeouts. |
55 | * |
56 | * @since 1.23 |
57 | */ |
58 | class MultiHttpClient implements LoggerAwareInterface { |
59 | /** Regex for headers likely to contain tokens, etc. that we want to redact from logs */ |
60 | private const SENSITIVE_HEADERS = '/(^|-|_)(authorization|auth|password|cookie)($|-|_)/'; |
61 | /** |
62 | * @phpcs:ignore MediaWiki.Commenting.PropertyDocumentation.ObjectTypeHintVar |
63 | * @var resource|object|null curl_multi_init() handle, initialized in getCurlMulti() |
64 | */ |
65 | protected $cmh = null; |
66 | /** @var string|null SSL certificates path */ |
67 | protected $caBundlePath; |
68 | /** @var float */ |
69 | protected $connTimeout = 10; |
70 | /** @var float */ |
71 | protected $maxConnTimeout = INF; |
72 | /** @var float */ |
73 | protected $reqTimeout = 30; |
74 | /** @var float */ |
75 | protected $maxReqTimeout = INF; |
76 | /** @var bool */ |
77 | protected $usePipelining = false; |
78 | /** @var int */ |
79 | protected $maxConnsPerHost = 50; |
80 | /** @var string|null */ |
81 | protected $proxy; |
82 | /** @var string|false */ |
83 | protected $localProxy = false; |
84 | /** @var string[] */ |
85 | protected $localVirtualHosts = []; |
86 | /** @var string */ |
87 | protected $userAgent = 'wikimedia/multi-http-client v1.1'; |
88 | /** @var LoggerInterface */ |
89 | protected $logger; |
90 | /** @var array */ |
91 | protected array $headers = []; |
92 | |
93 | // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect |
94 | // timeouts are periodically polled instead of being accurately respected. |
95 | // The select timeout is set to the minimum timeout multiplied by this factor. |
96 | private const TIMEOUT_ACCURACY_FACTOR = 0.1; |
97 | |
98 | private ?TelemetryHeadersInterface $telemetry = null; |
99 | |
100 | /** |
101 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
102 | * a client object with appropriately configured timeouts instead of constructing |
103 | * a MultiHttpClient directly. |
104 | * |
105 | * @param array $options |
106 | * - connTimeout : default connection timeout (seconds) |
107 | * - reqTimeout : default request timeout (seconds) |
108 | * - maxConnTimeout : maximum connection timeout (seconds) |
109 | * - maxReqTimeout : maximum request timeout (seconds) |
110 | * - proxy : HTTP proxy to use |
111 | * - localProxy : Reverse proxy to use for domains in localVirtualHosts |
112 | * - localVirtualHosts : Domains that are configured as virtual hosts on the same machine |
113 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
114 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
115 | * - userAgent : The User-Agent header value to send |
116 | * - logger : a \Psr\Log\LoggerInterface instance for debug logging |
117 | * - caBundlePath : path to specific Certificate Authority bundle (if any) |
118 | * - headers : an array of default headers to send with every request |
119 | * - telemetry : a \Wikimedia\Http\RequestTelemetry instance to track telemetry data |
120 | * @throws \Exception |
121 | */ |
122 | public function __construct( array $options ) { |
123 | if ( isset( $options['caBundlePath'] ) ) { |
124 | $this->caBundlePath = $options['caBundlePath']; |
125 | if ( !file_exists( $this->caBundlePath ) ) { |
126 | throw new InvalidArgumentException( "Cannot find CA bundle: " . $this->caBundlePath ); |
127 | } |
128 | } |
129 | static $opts = [ |
130 | 'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout', |
131 | 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger', |
132 | 'localProxy', 'localVirtualHosts', 'headers', 'telemetry' |
133 | ]; |
134 | foreach ( $opts as $key ) { |
135 | if ( isset( $options[$key] ) ) { |
136 | $this->$key = $options[$key]; |
137 | } |
138 | } |
139 | $this->logger ??= new NullLogger; |
140 | } |
141 | |
142 | /** |
143 | * Execute an HTTP(S) request |
144 | * |
145 | * This method returns a response map of: |
146 | * - code : HTTP response code or 0 if there was a serious error |
147 | * - reason : HTTP response reason (empty if there was a serious error) |
148 | * - headers : <header name/value associative array> |
149 | * - body : HTTP response body or resource (if "stream" was set) |
150 | * - error : Any error string |
151 | * The map also stores integer-indexed copies of these values. This lets callers do: |
152 | * @code |
153 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $http->run( $req ); |
154 | * @endcode |
155 | * @param array $req HTTP request array |
156 | * @param array $opts |
157 | * - connTimeout : connection timeout per request (seconds) |
158 | * - reqTimeout : post-connection timeout per request (seconds) |
159 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
160 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
161 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2' or 'v2.0'. Leave empty to use |
162 | * PHP/curl's default |
163 | * @return array Response array for request |
164 | */ |
165 | public function run( array $req, array $opts = [] ) { |
166 | return $this->runMulti( [ $req ], $opts )[0]['response']; |
167 | } |
168 | |
169 | /** |
170 | * Execute a set of HTTP(S) requests. |
171 | * |
172 | * If curl is available, requests will be made concurrently. |
173 | * Otherwise, they will be made serially. |
174 | * |
175 | * The maps are returned by this method with the 'response' field set to a map of: |
176 | * - code : HTTP response code or 0 if there was a serious error |
177 | * - reason : HTTP response reason (empty if there was a serious error) |
178 | * - headers : <header name/value associative array> |
179 | * - body : HTTP response body or resource (if "stream" was set) |
180 | * - error : Any error string |
181 | * The map also stores integer-indexed copies of these values. This lets callers do: |
182 | * @code |
183 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $req['response']; |
184 | * @endcode |
185 | * All headers in the 'headers' field are normalized to use lower case names. |
186 | * This is true for the request headers and the response headers. Integer-indexed |
187 | * method/URL entries will also be changed to use the corresponding string keys. |
188 | * |
189 | * @param array[] $reqs Map of HTTP request arrays |
190 | * @param array $opts Options |
191 | * - connTimeout : connection timeout per request (seconds) |
192 | * - reqTimeout : post-connection timeout per request (seconds) |
193 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
194 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
195 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2' or 'v2.0'. Leave empty to use |
196 | * PHP/curl's default |
197 | * @return array[] $reqs With response array populated for each |
198 | * @throws \Exception |
199 | */ |
200 | public function runMulti( array $reqs, array $opts = [] ) { |
201 | $this->normalizeRequests( $reqs ); |
202 | $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ]; |
203 | |
204 | if ( $this->maxConnTimeout && $opts['connTimeout'] > $this->maxConnTimeout ) { |
205 | $opts['connTimeout'] = $this->maxConnTimeout; |
206 | } |
207 | if ( $this->maxReqTimeout && $opts['reqTimeout'] > $this->maxReqTimeout ) { |
208 | $opts['reqTimeout'] = $this->maxReqTimeout; |
209 | } |
210 | |
211 | if ( $this->isCurlEnabled() ) { |
212 | switch ( $opts['httpVersion'] ?? null ) { |
213 | case 'v1.0': |
214 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_0; |
215 | break; |
216 | case 'v1.1': |
217 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_1; |
218 | break; |
219 | case 'v2': |
220 | case 'v2.0': |
221 | $opts['httpVersion'] = CURL_HTTP_VERSION_2_0; |
222 | break; |
223 | default: |
224 | $opts['httpVersion'] = CURL_HTTP_VERSION_NONE; |
225 | } |
226 | return $this->runMultiCurl( $reqs, $opts ); |
227 | } else { |
228 | # TODO: Add handling for httpVersion option |
229 | return $this->runMultiHttp( $reqs, $opts ); |
230 | } |
231 | } |
232 | |
233 | /** |
234 | * Determines if the curl extension is available |
235 | * |
236 | * @return bool true if curl is available, false otherwise. |
237 | */ |
238 | protected function isCurlEnabled() { |
239 | // Explicitly test if curl_multi* is blocked, as some users' hosts provide |
240 | // them with a modified curl with the multi-threaded parts removed(!) |
241 | return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' ); |
242 | } |
243 | |
244 | /** |
245 | * Execute a set of HTTP(S) requests concurrently |
246 | * |
247 | * @see MultiHttpClient::runMulti() |
248 | * |
249 | * @param array[] $reqs Map of HTTP request arrays |
250 | * @param array $opts |
251 | * - connTimeout : connection timeout per request (seconds) |
252 | * - reqTimeout : post-connection timeout per request (seconds) |
253 | * - usePipelining : whether to use HTTP pipelining if possible |
254 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
255 | * - httpVersion: : HTTP version to use |
256 | * @phan-param array{connTimeout?:int,reqTimeout?:int,usePipelining?:bool,maxConnsPerHost?:int} $opts |
257 | * @return array $reqs With response array populated for each |
258 | * @throws \Exception |
259 | * @suppress PhanTypeInvalidDimOffset |
260 | */ |
261 | private function runMultiCurl( array $reqs, array $opts ) { |
262 | $chm = $this->getCurlMulti( $opts ); |
263 | |
264 | $selectTimeout = $this->getSelectTimeout( $opts ); |
265 | |
266 | // Add all of the required cURL handles... |
267 | $handles = []; |
268 | foreach ( $reqs as $index => &$req ) { |
269 | $handles[$index] = $this->getCurlHandle( $req, $opts ); |
270 | curl_multi_add_handle( $chm, $handles[$index] ); |
271 | } |
272 | unset( $req ); // don't assign over this by accident |
273 | |
274 | $infos = []; |
275 | // Execute the cURL handles concurrently... |
276 | $active = null; // handles still being processed |
277 | do { |
278 | // Do any available work... |
279 | do { |
280 | $mrc = curl_multi_exec( $chm, $active ); |
281 | $info = curl_multi_info_read( $chm ); |
282 | if ( $info !== false ) { |
283 | // Note: cast to integer even works on PHP 8.0+ despite the |
284 | // handle being an object not a resource, because CurlHandle |
285 | // has a backwards-compatible cast_object handler. |
286 | $infos[(int)$info['handle']] = $info; |
287 | } |
288 | } while ( $mrc == CURLM_CALL_MULTI_PERFORM ); |
289 | // Wait (if possible) for available work... |
290 | if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) { |
291 | // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html |
292 | usleep( 5000 ); // 5ms |
293 | } |
294 | } while ( $active > 0 && $mrc == CURLM_OK ); |
295 | |
296 | // Remove all of the added cURL handles and check for errors... |
297 | foreach ( $reqs as $index => &$req ) { |
298 | $ch = $handles[$index]; |
299 | curl_multi_remove_handle( $chm, $ch ); |
300 | |
301 | if ( isset( $infos[(int)$ch] ) ) { |
302 | $info = $infos[(int)$ch]; |
303 | $errno = $info['result']; |
304 | if ( $errno !== 0 ) { |
305 | $req['response']['error'] = "(curl error: $errno)"; |
306 | if ( function_exists( 'curl_strerror' ) ) { |
307 | $req['response']['error'] .= " " . curl_strerror( $errno ); |
308 | } |
309 | $this->logger->warning( "Error fetching URL \"{$req['url']}\": " . |
310 | $req['response']['error'] ); |
311 | } else { |
312 | $this->logger->debug( |
313 | "HTTP complete: {method} {url} code={response_code} size={size} " . |
314 | "total={total_time} connect={connect_time}", |
315 | [ |
316 | 'method' => $req['method'], |
317 | 'url' => $req['url'], |
318 | 'response_code' => $req['response']['code'], |
319 | 'size' => curl_getinfo( $ch, CURLINFO_SIZE_DOWNLOAD ), |
320 | 'total_time' => $this->getCurlTime( |
321 | $ch, CURLINFO_TOTAL_TIME, 'CURLINFO_TOTAL_TIME_T' |
322 | ), |
323 | 'connect_time' => $this->getCurlTime( |
324 | $ch, CURLINFO_CONNECT_TIME, 'CURLINFO_CONNECT_TIME_T' |
325 | ), |
326 | ] |
327 | ); |
328 | } |
329 | } else { |
330 | $req['response']['error'] = "(curl error: no status set)"; |
331 | } |
332 | |
333 | // For convenience with array destructuring |
334 | $req['response'][0] = $req['response']['code']; |
335 | $req['response'][1] = $req['response']['reason']; |
336 | $req['response'][2] = $req['response']['headers']; |
337 | $req['response'][3] = $req['response']['body']; |
338 | $req['response'][4] = $req['response']['error']; |
339 | curl_close( $ch ); |
340 | // Close any string wrapper file handles |
341 | if ( isset( $req['_closeHandle'] ) ) { |
342 | fclose( $req['_closeHandle'] ); |
343 | unset( $req['_closeHandle'] ); |
344 | } |
345 | } |
346 | unset( $req ); // don't assign over this by accident |
347 | |
348 | return $reqs; |
349 | } |
350 | |
351 | /** |
352 | * @param array &$req HTTP request map |
353 | * @phpcs:ignore Generic.Files.LineLength |
354 | * @phan-param array{url:string,proxy?:?string,query:mixed,method:string,body:string|resource,headers:array<string,string>,stream?:resource,flags:array} $req |
355 | * @param array $opts |
356 | * - connTimeout : default connection timeout |
357 | * - reqTimeout : default request timeout |
358 | * - httpVersion: default HTTP version |
359 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
360 | * @return resource|object |
361 | * @throws \Exception |
362 | */ |
363 | protected function getCurlHandle( array &$req, array $opts ) { |
364 | $ch = curl_init(); |
365 | |
366 | curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy ); |
367 | curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) ); |
368 | curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) ); |
369 | curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 ); |
370 | curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 ); |
371 | curl_setopt( $ch, CURLOPT_HEADER, 0 ); |
372 | if ( $this->caBundlePath !== null ) { |
373 | curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true ); |
374 | curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath ); |
375 | } |
376 | curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); |
377 | |
378 | $url = $req['url']; |
379 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
380 | if ( $query != '' ) { |
381 | $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; |
382 | } |
383 | curl_setopt( $ch, CURLOPT_URL, $url ); |
384 | curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] ); |
385 | curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) ); |
386 | curl_setopt( $ch, CURLOPT_HTTP_VERSION, $opts['httpVersion'] ?? CURL_HTTP_VERSION_NONE ); |
387 | |
388 | if ( $req['method'] === 'PUT' ) { |
389 | curl_setopt( $ch, CURLOPT_PUT, 1 ); |
390 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
391 | if ( is_resource( $req['body'] ) ) { |
392 | curl_setopt( $ch, CURLOPT_INFILE, $req['body'] ); |
393 | if ( isset( $req['headers']['content-length'] ) ) { |
394 | curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] ); |
395 | } elseif ( isset( $req['headers']['transfer-encoding'] ) && |
396 | $req['headers']['transfer-encoding'] === 'chunks' |
397 | ) { |
398 | curl_setopt( $ch, CURLOPT_UPLOAD, true ); |
399 | } else { |
400 | throw new InvalidArgumentException( "Missing 'Content-Length' or 'Transfer-Encoding' header." ); |
401 | } |
402 | } elseif ( $req['body'] !== '' ) { |
403 | $fp = fopen( "php://temp", "wb+" ); |
404 | fwrite( $fp, $req['body'], strlen( $req['body'] ) ); |
405 | rewind( $fp ); |
406 | curl_setopt( $ch, CURLOPT_INFILE, $fp ); |
407 | curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) ); |
408 | $req['_closeHandle'] = $fp; // remember to close this later |
409 | } else { |
410 | curl_setopt( $ch, CURLOPT_INFILESIZE, 0 ); |
411 | } |
412 | curl_setopt( $ch, CURLOPT_READFUNCTION, |
413 | static function ( $ch, $fd, $length ) { |
414 | return (string)fread( $fd, $length ); |
415 | } |
416 | ); |
417 | } elseif ( $req['method'] === 'POST' ) { |
418 | curl_setopt( $ch, CURLOPT_POST, 1 ); |
419 | curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] ); |
420 | } else { |
421 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
422 | if ( is_resource( $req['body'] ) || $req['body'] !== '' ) { |
423 | throw new InvalidArgumentException( "HTTP body specified for a non PUT/POST request." ); |
424 | } |
425 | $req['headers']['content-length'] = 0; |
426 | } |
427 | |
428 | if ( !isset( $req['headers']['user-agent'] ) ) { |
429 | $req['headers']['user-agent'] = $this->userAgent; |
430 | } |
431 | |
432 | $headers = []; |
433 | foreach ( $req['headers'] as $name => $value ) { |
434 | if ( strpos( $name, ':' ) !== false ) { |
435 | throw new InvalidArgumentException( "Header name must not contain colon-space." ); |
436 | } |
437 | $headers[] = $name . ': ' . trim( $value ); |
438 | } |
439 | curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); |
440 | |
441 | curl_setopt( $ch, CURLOPT_HEADERFUNCTION, |
442 | static function ( $ch, $header ) use ( &$req ) { |
443 | if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) { |
444 | header( $header ); |
445 | } |
446 | $length = strlen( $header ); |
447 | $matches = []; |
448 | if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) { |
449 | $req['response']['code'] = (int)$matches[2]; |
450 | $req['response']['reason'] = trim( $matches[3] ); |
451 | // After a redirect we will receive this again, but we already stored headers |
452 | // that belonged to a redirect response. Start over. |
453 | $req['response']['headers'] = []; |
454 | return $length; |
455 | } |
456 | if ( strpos( $header, ":" ) === false ) { |
457 | return $length; |
458 | } |
459 | [ $name, $value ] = explode( ":", $header, 2 ); |
460 | $name = strtolower( $name ); |
461 | $value = trim( $value ); |
462 | if ( isset( $req['response']['headers'][$name] ) ) { |
463 | $req['response']['headers'][$name] .= ', ' . $value; |
464 | } else { |
465 | $req['response']['headers'][$name] = $value; |
466 | } |
467 | return $length; |
468 | } |
469 | ); |
470 | |
471 | // This works with both file and php://temp handles (unlike CURLOPT_FILE) |
472 | $hasOutputStream = isset( $req['stream'] ); |
473 | curl_setopt( $ch, CURLOPT_WRITEFUNCTION, |
474 | static function ( $ch, $data ) use ( &$req, $hasOutputStream ) { |
475 | if ( $hasOutputStream ) { |
476 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive |
477 | return fwrite( $req['stream'], $data ); |
478 | } else { |
479 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
480 | $req['response']['body'] .= $data; |
481 | |
482 | return strlen( $data ); |
483 | } |
484 | } |
485 | ); |
486 | |
487 | return $ch; |
488 | } |
489 | |
490 | /** |
491 | * @param array $opts |
492 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
493 | * @return resource|object |
494 | * @throws \Exception |
495 | */ |
496 | protected function getCurlMulti( array $opts ) { |
497 | if ( !$this->cmh ) { |
498 | $cmh = curl_multi_init(); |
499 | // Limit the size of the idle connection cache such that consecutive parallel |
500 | // request batches to the same host can avoid having to keep making connections |
501 | curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); |
502 | $this->cmh = $cmh; |
503 | } |
504 | |
505 | $curlVersion = curl_version()['version']; |
506 | |
507 | // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0 |
508 | if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) { |
509 | // Limit the number of in-flight requests for any given host |
510 | $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost; |
511 | curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns ); |
512 | } |
513 | |
514 | if ( $opts['usePipelining'] ?? $this->usePipelining ) { |
515 | if ( version_compare( $curlVersion, '7.43', '<' ) ) { |
516 | // The option is a boolean |
517 | $pipelining = 1; |
518 | } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) { |
519 | // The option is a bitfield and HTTP/1.x pipelining is supported |
520 | $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX; |
521 | } else { |
522 | // The option is a bitfield but HTTP/1.x pipelining has been removed |
523 | $pipelining = CURLPIPE_MULTIPLEX; |
524 | } |
525 | // Suppress deprecation, we know already (T264735) |
526 | // phpcs:ignore Generic.PHP.NoSilencedErrors |
527 | @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining ); |
528 | } |
529 | |
530 | return $this->cmh; |
531 | } |
532 | |
533 | /** |
534 | * Get a time in seconds, formatted with microsecond resolution, or fall back to second |
535 | * resolution on PHP 7.2 |
536 | * |
537 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintParam |
538 | * @param resource|object $ch |
539 | * @param int $oldOption |
540 | * @param string $newConstName |
541 | * @return string |
542 | */ |
543 | private function getCurlTime( $ch, $oldOption, $newConstName ): string { |
544 | if ( defined( $newConstName ) ) { |
545 | return sprintf( "%.6F", curl_getinfo( $ch, constant( $newConstName ) ) / 1e6 ); |
546 | } else { |
547 | return (string)curl_getinfo( $ch, $oldOption ); |
548 | } |
549 | } |
550 | |
551 | /** |
552 | * Execute a set of HTTP(S) requests sequentially. |
553 | * |
554 | * @see MultiHttpClient::runMulti() |
555 | * @todo Remove dependency on MediaWikiServices: rewrite using Guzzle T202352 |
556 | * @param array $reqs Map of HTTP request arrays |
557 | * @phpcs:ignore Generic.Files.LineLength |
558 | * @phan-param array<int,array{url:string,query:array,method:string,body:string,headers:array<string,string>,proxy?:?string}> $reqs |
559 | * @param array $opts |
560 | * - connTimeout : connection timeout per request (seconds) |
561 | * - reqTimeout : post-connection timeout per request (seconds) |
562 | * @phan-param array{connTimeout:int,reqTimeout:int} $opts |
563 | * @return array $reqs With response array populated for each |
564 | * @throws \Exception |
565 | */ |
566 | private function runMultiHttp( array $reqs, array $opts = [] ) { |
567 | $httpOptions = [ |
568 | 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout, |
569 | 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout, |
570 | 'logger' => $this->logger, |
571 | 'caInfo' => $this->caBundlePath, |
572 | ]; |
573 | foreach ( $reqs as &$req ) { |
574 | $reqOptions = $httpOptions + [ |
575 | 'method' => $req['method'], |
576 | 'proxy' => $req['proxy'] ?? $this->proxy, |
577 | 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent, |
578 | 'postData' => $req['body'], |
579 | ]; |
580 | |
581 | $url = $req['url']; |
582 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
583 | if ( $query != '' ) { |
584 | $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; |
585 | } |
586 | |
587 | $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( |
588 | $url, $reqOptions, __METHOD__ ); |
589 | $httpRequest->setLogger( $this->logger ); |
590 | foreach ( $req['headers'] as $header => $value ) { |
591 | $httpRequest->setHeader( $header, $value ); |
592 | } |
593 | $sv = $httpRequest->execute()->getStatusValue(); |
594 | |
595 | $respHeaders = array_map( |
596 | static function ( $v ) { |
597 | return implode( ', ', $v ); |
598 | }, |
599 | $httpRequest->getResponseHeaders() ); |
600 | |
601 | $req['response'] = [ |
602 | 'code' => $httpRequest->getStatus(), |
603 | 'reason' => '', |
604 | 'headers' => $respHeaders, |
605 | 'body' => $httpRequest->getContent(), |
606 | 'error' => '', |
607 | ]; |
608 | |
609 | if ( !$sv->isOK() ) { |
610 | $svErrors = $sv->getErrors(); |
611 | if ( isset( $svErrors[0] ) ) { |
612 | $req['response']['error'] = $svErrors[0]['message']; |
613 | |
614 | // param values vary per failure type (ex. unknown host vs unknown page) |
615 | if ( isset( $svErrors[0]['params'][0] ) ) { |
616 | if ( is_numeric( $svErrors[0]['params'][0] ) ) { |
617 | if ( isset( $svErrors[0]['params'][1] ) ) { |
618 | // @phan-suppress-next-line PhanTypeInvalidDimOffset |
619 | $req['response']['reason'] = $svErrors[0]['params'][1]; |
620 | } |
621 | } else { |
622 | $req['response']['reason'] = $svErrors[0]['params'][0]; |
623 | } |
624 | } |
625 | } |
626 | } |
627 | |
628 | $req['response'][0] = $req['response']['code']; |
629 | $req['response'][1] = $req['response']['reason']; |
630 | $req['response'][2] = $req['response']['headers']; |
631 | $req['response'][3] = $req['response']['body']; |
632 | $req['response'][4] = $req['response']['error']; |
633 | } |
634 | |
635 | return $reqs; |
636 | } |
637 | |
638 | /** |
639 | * Normalize headers array |
640 | * @param array $headers |
641 | * @return array |
642 | */ |
643 | private function normalizeHeaders( array $headers ): array { |
644 | $normalized = []; |
645 | foreach ( $headers as $name => $value ) { |
646 | $normalized[strtolower( $name )] = $value; |
647 | } |
648 | return $normalized; |
649 | } |
650 | |
651 | /** |
652 | * Normalize request information |
653 | * |
654 | * @param array[] &$reqs the requests to normalize |
655 | */ |
656 | private function normalizeRequests( array &$reqs ) { |
657 | foreach ( $reqs as &$req ) { |
658 | $req['response'] = [ |
659 | 'code' => 0, |
660 | 'reason' => '', |
661 | 'headers' => [], |
662 | 'body' => '', |
663 | 'error' => '' |
664 | ]; |
665 | if ( isset( $req[0] ) ) { |
666 | $req['method'] = $req[0]; // short-form |
667 | unset( $req[0] ); |
668 | } |
669 | if ( isset( $req[1] ) ) { |
670 | $req['url'] = $req[1]; // short-form |
671 | unset( $req[1] ); |
672 | } |
673 | if ( !isset( $req['method'] ) ) { |
674 | throw new InvalidArgumentException( "Request has no 'method' field set." ); |
675 | } elseif ( !isset( $req['url'] ) ) { |
676 | throw new InvalidArgumentException( "Request has no 'url' field set." ); |
677 | } |
678 | if ( $this->localProxy !== false && $this->isLocalURL( $req['url'] ) ) { |
679 | $this->useReverseProxy( $req, $this->localProxy ); |
680 | } |
681 | $req['query'] ??= []; |
682 | $req['headers'] = $this->normalizeHeaders( |
683 | array_merge( |
684 | $this->headers, |
685 | $this->telemetry ? $this->telemetry->getRequestHeaders() : [], |
686 | $req['headers'] ?? [] |
687 | ) |
688 | ); |
689 | |
690 | if ( !isset( $req['body'] ) ) { |
691 | $req['body'] = ''; |
692 | $req['headers']['content-length'] = 0; |
693 | } |
694 | // Redact some headers we know to have tokens before logging them |
695 | $logHeaders = $req['headers']; |
696 | foreach ( $logHeaders as $header => $value ) { |
697 | if ( preg_match( self::SENSITIVE_HEADERS, $header ) === 1 ) { |
698 | $logHeaders[$header] = '[redacted]'; |
699 | } |
700 | } |
701 | $this->logger->debug( "HTTP start: {method} {url}", |
702 | [ |
703 | 'method' => $req['method'], |
704 | 'url' => $req['url'], |
705 | 'headers' => $logHeaders, |
706 | ] |
707 | ); |
708 | $req['flags'] ??= []; |
709 | } |
710 | } |
711 | |
712 | private function useReverseProxy( array &$req, $proxy ) { |
713 | $parsedProxy = parse_url( $proxy ); |
714 | if ( $parsedProxy === false ) { |
715 | throw new InvalidArgumentException( "Invalid reverseProxy configured: $proxy" ); |
716 | } |
717 | $parsedUrl = parse_url( $req['url'] ); |
718 | if ( $parsedUrl === false ) { |
719 | throw new InvalidArgumentException( "Invalid url specified: {$req['url']}" ); |
720 | } |
721 | // Set the current host in the Host header |
722 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
723 | $req['headers']['Host'] = $parsedUrl['host']; |
724 | // Replace scheme, host and port in the request |
725 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
726 | $parsedUrl['scheme'] = $parsedProxy['scheme']; |
727 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
728 | $parsedUrl['host'] = $parsedProxy['host']; |
729 | if ( isset( $parsedProxy['port'] ) ) { |
730 | $parsedUrl['port'] = $parsedProxy['port']; |
731 | } else { |
732 | unset( $parsedUrl['port'] ); |
733 | } |
734 | $req['url'] = self::assembleUrl( $parsedUrl ); |
735 | // Explicitly disable use of another proxy by setting to false, |
736 | // since null will fallback to $this->proxy |
737 | $req['proxy'] = false; |
738 | } |
739 | |
740 | /** |
741 | * This is derived from MediaWiki\Utils\UrlUtils::assemble but changed to work |
742 | * with parse_url's result so the delimiter is hardcoded. |
743 | * |
744 | * The basic structure used: |
745 | * [scheme://][[user][:pass]@][host][:port][path][?query][#fragment] |
746 | * |
747 | * @param array $urlParts URL parts, as output from parse_url() |
748 | * @return string URL assembled from its component parts |
749 | */ |
750 | private static function assembleUrl( array $urlParts ): string { |
751 | $result = isset( $urlParts['scheme'] ) ? $urlParts['scheme'] . '://' : ''; |
752 | |
753 | if ( isset( $urlParts['host'] ) ) { |
754 | if ( isset( $urlParts['user'] ) ) { |
755 | $result .= $urlParts['user']; |
756 | if ( isset( $urlParts['pass'] ) ) { |
757 | $result .= ':' . $urlParts['pass']; |
758 | } |
759 | $result .= '@'; |
760 | } |
761 | |
762 | $result .= $urlParts['host']; |
763 | |
764 | if ( isset( $urlParts['port'] ) ) { |
765 | $result .= ':' . $urlParts['port']; |
766 | } |
767 | } |
768 | |
769 | if ( isset( $urlParts['path'] ) ) { |
770 | $result .= $urlParts['path']; |
771 | } |
772 | |
773 | if ( isset( $urlParts['query'] ) && $urlParts['query'] !== '' ) { |
774 | $result .= '?' . $urlParts['query']; |
775 | } |
776 | |
777 | if ( isset( $urlParts['fragment'] ) ) { |
778 | $result .= '#' . $urlParts['fragment']; |
779 | } |
780 | |
781 | return $result; |
782 | } |
783 | |
784 | /** |
785 | * Check if the URL can be served by localhost |
786 | * |
787 | * @note this is mostly a copy of MWHttpRequest::isLocalURL() |
788 | * @param string $url Full url to check |
789 | * @return bool |
790 | */ |
791 | private function isLocalURL( $url ) { |
792 | if ( !$this->localVirtualHosts ) { |
793 | // Shortcut |
794 | return false; |
795 | } |
796 | |
797 | // Extract host part |
798 | $matches = []; |
799 | if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) { |
800 | $host = $matches[1]; |
801 | // Split up dotwise |
802 | $domainParts = explode( '.', $host ); |
803 | // Check if this domain or any superdomain is listed as a local virtual host |
804 | $domainParts = array_reverse( $domainParts ); |
805 | |
806 | $domain = ''; |
807 | $countParts = count( $domainParts ); |
808 | for ( $i = 0; $i < $countParts; $i++ ) { |
809 | $domainPart = $domainParts[$i]; |
810 | if ( $i == 0 ) { |
811 | $domain = $domainPart; |
812 | } else { |
813 | $domain = $domainPart . '.' . $domain; |
814 | } |
815 | |
816 | if ( in_array( $domain, $this->localVirtualHosts ) ) { |
817 | return true; |
818 | } |
819 | } |
820 | } |
821 | |
822 | return false; |
823 | } |
824 | |
825 | /** |
826 | * Get a suitable select timeout for the given options. |
827 | * |
828 | * @param array $opts |
829 | * @return float |
830 | */ |
831 | private function getSelectTimeout( $opts ) { |
832 | $connTimeout = $opts['connTimeout'] ?? $this->connTimeout; |
833 | $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout; |
834 | $timeouts = array_filter( [ $connTimeout, $reqTimeout ] ); |
835 | if ( count( $timeouts ) === 0 ) { |
836 | return 1; |
837 | } |
838 | |
839 | $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR; |
840 | // Minimum 10us |
841 | if ( $selectTimeout < 10e-6 ) { |
842 | $selectTimeout = 10e-6; |
843 | } |
844 | return $selectTimeout; |
845 | } |
846 | |
847 | /** |
848 | * Register a logger |
849 | * |
850 | * @param LoggerInterface $logger |
851 | */ |
852 | public function setLogger( LoggerInterface $logger ) { |
853 | $this->logger = $logger; |
854 | } |
855 | |
856 | public function __destruct() { |
857 | if ( $this->cmh ) { |
858 | curl_multi_close( $this->cmh ); |
859 | $this->cmh = null; |
860 | } |
861 | } |
862 | |
863 | } |
864 | /** @deprecated class alias since 1.43 */ |
865 | class_alias( MultiHttpClient::class, 'MultiHttpClient' ); |