Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
56.89% |
194 / 341 |
|
18.75% |
3 / 16 |
CRAP | |
0.00% |
0 / 1 |
MultiHttpClient | |
56.89% |
194 / 341 |
|
18.75% |
3 / 16 |
918.21 | |
0.00% |
0 / 1 |
__construct | |
76.92% |
10 / 13 |
|
0.00% |
0 / 1 |
5.31 | |||
run | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
runMulti | |
38.10% |
8 / 21 |
|
0.00% |
0 / 1 |
39.70 | |||
isCurlEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
runMultiCurl | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
156 | |||
getCurlHandle | |
43.18% |
38 / 88 |
|
0.00% |
0 / 1 |
110.78 | |||
getCurlMulti | |
87.50% |
14 / 16 |
|
0.00% |
0 / 1 |
6.07 | |||
getCurlTime | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
runMultiHttp | |
98.00% |
49 / 50 |
|
0.00% |
0 / 1 |
10 | |||
normalizeHeaders | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
normalizeRequests | |
93.02% |
40 / 43 |
|
0.00% |
0 / 1 |
12.05 | |||
useReverseProxy | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
4.16 | |||
isLocalURL | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
6.01 | |||
getSelectTimeout | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__destruct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * HTTP service client |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | use MediaWiki\MediaWikiServices; |
24 | use Psr\Log\LoggerAwareInterface; |
25 | use Psr\Log\LoggerInterface; |
26 | use Psr\Log\NullLogger; |
27 | use Wikimedia\Http\TelemetryHeadersInterface; |
28 | |
29 | /** |
30 | * Class to handle multiple HTTP requests |
31 | * |
32 | * If curl is available, requests will be made concurrently. |
33 | * Otherwise, they will be made serially. |
34 | * |
35 | * HTTP request maps are arrays that use the following format: |
36 | * - method : GET/HEAD/PUT/POST/DELETE |
37 | * - url : HTTP/HTTPS URL |
38 | * - query : <query parameter field/value associative array> (uses RFC 3986) |
39 | * - headers : <header name/value associative array> |
40 | * - body : source to get the HTTP request body from; |
41 | * this can simply be a string (always), a resource for |
42 | * PUT requests, and a field/value array for POST request; |
43 | * array bodies are encoded as multipart/form-data and strings |
44 | * use application/x-www-form-urlencoded (headers sent automatically) |
45 | * - stream : resource to stream the HTTP response body to |
46 | * - proxy : HTTP proxy to use |
47 | * - flags : map of boolean flags which supports: |
48 | * - relayResponseHeaders : write out header via header() |
49 | * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'. |
50 | * |
51 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
52 | * a client object with appropriately configured timeouts. |
53 | * |
54 | * @since 1.23 |
55 | */ |
56 | class MultiHttpClient implements LoggerAwareInterface { |
57 | /** Regex for headers likely to contain tokens, etc. that we want to redact from logs */ |
58 | private const SENSITIVE_HEADERS = '/(^|-|_)(authorization|auth|password|cookie)($|-|_)/'; |
59 | /** |
60 | * @phpcs:ignore MediaWiki.Commenting.PropertyDocumentation.ObjectTypeHintVar |
61 | * @var resource|object|null curl_multi_init() handle, initialized in getCurlMulti() |
62 | */ |
63 | protected $cmh = null; |
64 | /** @var string|null SSL certificates path */ |
65 | protected $caBundlePath; |
66 | /** @var float */ |
67 | protected $connTimeout = 10; |
68 | /** @var float */ |
69 | protected $maxConnTimeout = INF; |
70 | /** @var float */ |
71 | protected $reqTimeout = 30; |
72 | /** @var float */ |
73 | protected $maxReqTimeout = INF; |
74 | /** @var bool */ |
75 | protected $usePipelining = false; |
76 | /** @var int */ |
77 | protected $maxConnsPerHost = 50; |
78 | /** @var string|null proxy */ |
79 | protected $proxy; |
80 | /** @var string|false */ |
81 | protected $localProxy = false; |
82 | /** @var string[] */ |
83 | protected $localVirtualHosts = []; |
84 | /** @var string */ |
85 | protected $userAgent = 'wikimedia/multi-http-client v1.1'; |
86 | /** @var LoggerInterface */ |
87 | protected $logger; |
88 | /** @var array */ |
89 | protected array $headers = []; |
90 | |
91 | // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect |
92 | // timeouts are periodically polled instead of being accurately respected. |
93 | // The select timeout is set to the minimum timeout multiplied by this factor. |
94 | private const TIMEOUT_ACCURACY_FACTOR = 0.1; |
95 | |
96 | private ?TelemetryHeadersInterface $telemetry = null; |
97 | |
98 | /** |
99 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
100 | * a client object with appropriately configured timeouts instead of constructing |
101 | * a MultiHttpClient directly. |
102 | * |
103 | * @param array $options |
104 | * - connTimeout : default connection timeout (seconds) |
105 | * - reqTimeout : default request timeout (seconds) |
106 | * - maxConnTimeout : maximum connection timeout (seconds) |
107 | * - maxReqTimeout : maximum request timeout (seconds) |
108 | * - proxy : HTTP proxy to use |
109 | * - localProxy : Reverse proxy to use for domains in localVirtualHosts |
110 | * - localVirtualHosts : Domains that are configured as virtual hosts on the same machine |
111 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
112 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
113 | * - userAgent : The User-Agent header value to send |
114 | * - logger : a \Psr\Log\LoggerInterface instance for debug logging |
115 | * - caBundlePath : path to specific Certificate Authority bundle (if any) |
116 | * - headers : an array of default headers to send with every request |
117 | * - telemetry : a \Wikimedia\Http\RequestTelemetry instance to track telemetry data |
118 | * @throws Exception |
119 | */ |
120 | public function __construct( array $options ) { |
121 | if ( isset( $options['caBundlePath'] ) ) { |
122 | $this->caBundlePath = $options['caBundlePath']; |
123 | if ( !file_exists( $this->caBundlePath ) ) { |
124 | throw new InvalidArgumentException( "Cannot find CA bundle: " . $this->caBundlePath ); |
125 | } |
126 | } |
127 | static $opts = [ |
128 | 'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout', |
129 | 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger', |
130 | 'localProxy', 'localVirtualHosts', 'headers', 'telemetry' |
131 | ]; |
132 | foreach ( $opts as $key ) { |
133 | if ( isset( $options[$key] ) ) { |
134 | $this->$key = $options[$key]; |
135 | } |
136 | } |
137 | $this->logger ??= new NullLogger; |
138 | } |
139 | |
140 | /** |
141 | * Execute an HTTP(S) request |
142 | * |
143 | * This method returns a response map of: |
144 | * - code : HTTP response code or 0 if there was a serious error |
145 | * - reason : HTTP response reason (empty if there was a serious error) |
146 | * - headers : <header name/value associative array> |
147 | * - body : HTTP response body or resource (if "stream" was set) |
148 | * - error : Any error string |
149 | * The map also stores integer-indexed copies of these values. This lets callers do: |
150 | * @code |
151 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $http->run( $req ); |
152 | * @endcode |
153 | * @param array $req HTTP request array |
154 | * @param array $opts |
155 | * - connTimeout : connection timeout per request (seconds) |
156 | * - reqTimeout : post-connection timeout per request (seconds) |
157 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
158 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
159 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2' or 'v2.0'. Leave empty to use |
160 | * PHP/curl's default |
161 | * @return array Response array for request |
162 | */ |
163 | public function run( array $req, array $opts = [] ) { |
164 | return $this->runMulti( [ $req ], $opts )[0]['response']; |
165 | } |
166 | |
167 | /** |
168 | * Execute a set of HTTP(S) requests. |
169 | * |
170 | * If curl is available, requests will be made concurrently. |
171 | * Otherwise, they will be made serially. |
172 | * |
173 | * The maps are returned by this method with the 'response' field set to a map of: |
174 | * - code : HTTP response code or 0 if there was a serious error |
175 | * - reason : HTTP response reason (empty if there was a serious error) |
176 | * - headers : <header name/value associative array> |
177 | * - body : HTTP response body or resource (if "stream" was set) |
178 | * - error : Any error string |
179 | * The map also stores integer-indexed copies of these values. This lets callers do: |
180 | * @code |
181 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $req['response']; |
182 | * @endcode |
183 | * All headers in the 'headers' field are normalized to use lower case names. |
184 | * This is true for the request headers and the response headers. Integer-indexed |
185 | * method/URL entries will also be changed to use the corresponding string keys. |
186 | * |
187 | * @param array[] $reqs Map of HTTP request arrays |
188 | * @param array $opts Options |
189 | * - connTimeout : connection timeout per request (seconds) |
190 | * - reqTimeout : post-connection timeout per request (seconds) |
191 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
192 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
193 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2' or 'v2.0'. Leave empty to use |
194 | * PHP/curl's default |
195 | * @return array[] $reqs With response array populated for each |
196 | * @throws Exception |
197 | */ |
198 | public function runMulti( array $reqs, array $opts = [] ) { |
199 | $this->normalizeRequests( $reqs ); |
200 | $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ]; |
201 | |
202 | if ( $this->maxConnTimeout && $opts['connTimeout'] > $this->maxConnTimeout ) { |
203 | $opts['connTimeout'] = $this->maxConnTimeout; |
204 | } |
205 | if ( $this->maxReqTimeout && $opts['reqTimeout'] > $this->maxReqTimeout ) { |
206 | $opts['reqTimeout'] = $this->maxReqTimeout; |
207 | } |
208 | |
209 | if ( $this->isCurlEnabled() ) { |
210 | switch ( $opts['httpVersion'] ?? null ) { |
211 | case 'v1.0': |
212 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_0; |
213 | break; |
214 | case 'v1.1': |
215 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_1; |
216 | break; |
217 | case 'v2': |
218 | case 'v2.0': |
219 | $opts['httpVersion'] = CURL_HTTP_VERSION_2_0; |
220 | break; |
221 | default: |
222 | $opts['httpVersion'] = CURL_HTTP_VERSION_NONE; |
223 | } |
224 | return $this->runMultiCurl( $reqs, $opts ); |
225 | } else { |
226 | # TODO: Add handling for httpVersion option |
227 | return $this->runMultiHttp( $reqs, $opts ); |
228 | } |
229 | } |
230 | |
231 | /** |
232 | * Determines if the curl extension is available |
233 | * |
234 | * @return bool true if curl is available, false otherwise. |
235 | */ |
236 | protected function isCurlEnabled() { |
237 | // Explicitly test if curl_multi* is blocked, as some users' hosts provide |
238 | // them with a modified curl with the multi-threaded parts removed(!) |
239 | return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' ); |
240 | } |
241 | |
242 | /** |
243 | * Execute a set of HTTP(S) requests concurrently |
244 | * |
245 | * @see MultiHttpClient::runMulti() |
246 | * |
247 | * @param array[] $reqs Map of HTTP request arrays |
248 | * @param array $opts |
249 | * - connTimeout : connection timeout per request (seconds) |
250 | * - reqTimeout : post-connection timeout per request (seconds) |
251 | * - usePipelining : whether to use HTTP pipelining if possible |
252 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
253 | * - httpVersion: : HTTP version to use |
254 | * @phan-param array{connTimeout?:int,reqTimeout?:int,usePipelining?:bool,maxConnsPerHost?:int} $opts |
255 | * @return array $reqs With response array populated for each |
256 | * @throws Exception |
257 | * @suppress PhanTypeInvalidDimOffset |
258 | */ |
259 | private function runMultiCurl( array $reqs, array $opts ) { |
260 | $chm = $this->getCurlMulti( $opts ); |
261 | |
262 | $selectTimeout = $this->getSelectTimeout( $opts ); |
263 | |
264 | // Add all of the required cURL handles... |
265 | $handles = []; |
266 | foreach ( $reqs as $index => &$req ) { |
267 | $handles[$index] = $this->getCurlHandle( $req, $opts ); |
268 | curl_multi_add_handle( $chm, $handles[$index] ); |
269 | } |
270 | unset( $req ); // don't assign over this by accident |
271 | |
272 | $infos = []; |
273 | // Execute the cURL handles concurrently... |
274 | $active = null; // handles still being processed |
275 | do { |
276 | // Do any available work... |
277 | do { |
278 | $mrc = curl_multi_exec( $chm, $active ); |
279 | $info = curl_multi_info_read( $chm ); |
280 | if ( $info !== false ) { |
281 | // Note: cast to integer even works on PHP 8.0+ despite the |
282 | // handle being an object not a resource, because CurlHandle |
283 | // has a backwards-compatible cast_object handler. |
284 | $infos[(int)$info['handle']] = $info; |
285 | } |
286 | } while ( $mrc == CURLM_CALL_MULTI_PERFORM ); |
287 | // Wait (if possible) for available work... |
288 | if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) { |
289 | // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html |
290 | usleep( 5000 ); // 5ms |
291 | } |
292 | } while ( $active > 0 && $mrc == CURLM_OK ); |
293 | |
294 | // Remove all of the added cURL handles and check for errors... |
295 | foreach ( $reqs as $index => &$req ) { |
296 | $ch = $handles[$index]; |
297 | curl_multi_remove_handle( $chm, $ch ); |
298 | |
299 | if ( isset( $infos[(int)$ch] ) ) { |
300 | $info = $infos[(int)$ch]; |
301 | $errno = $info['result']; |
302 | if ( $errno !== 0 ) { |
303 | $req['response']['error'] = "(curl error: $errno)"; |
304 | if ( function_exists( 'curl_strerror' ) ) { |
305 | $req['response']['error'] .= " " . curl_strerror( $errno ); |
306 | } |
307 | $this->logger->warning( "Error fetching URL \"{$req['url']}\": " . |
308 | $req['response']['error'] ); |
309 | } else { |
310 | $this->logger->debug( |
311 | "HTTP complete: {method} {url} code={response_code} size={size} " . |
312 | "total={total_time} connect={connect_time}", |
313 | [ |
314 | 'method' => $req['method'], |
315 | 'url' => $req['url'], |
316 | 'response_code' => $req['response']['code'], |
317 | 'size' => curl_getinfo( $ch, CURLINFO_SIZE_DOWNLOAD ), |
318 | 'total_time' => $this->getCurlTime( |
319 | $ch, CURLINFO_TOTAL_TIME, 'CURLINFO_TOTAL_TIME_T' |
320 | ), |
321 | 'connect_time' => $this->getCurlTime( |
322 | $ch, CURLINFO_CONNECT_TIME, 'CURLINFO_CONNECT_TIME_T' |
323 | ), |
324 | ] |
325 | ); |
326 | } |
327 | } else { |
328 | $req['response']['error'] = "(curl error: no status set)"; |
329 | } |
330 | |
331 | // For convenience with array destructuring |
332 | $req['response'][0] = $req['response']['code']; |
333 | $req['response'][1] = $req['response']['reason']; |
334 | $req['response'][2] = $req['response']['headers']; |
335 | $req['response'][3] = $req['response']['body']; |
336 | $req['response'][4] = $req['response']['error']; |
337 | curl_close( $ch ); |
338 | // Close any string wrapper file handles |
339 | if ( isset( $req['_closeHandle'] ) ) { |
340 | fclose( $req['_closeHandle'] ); |
341 | unset( $req['_closeHandle'] ); |
342 | } |
343 | } |
344 | unset( $req ); // don't assign over this by accident |
345 | |
346 | return $reqs; |
347 | } |
348 | |
349 | /** |
350 | * @param array &$req HTTP request map |
351 | * @phpcs:ignore Generic.Files.LineLength |
352 | * @phan-param array{url:string,proxy?:?string,query:mixed,method:string,body:string|resource,headers:array<string,string>,stream?:resource,flags:array} $req |
353 | * @param array $opts |
354 | * - connTimeout : default connection timeout |
355 | * - reqTimeout : default request timeout |
356 | * - httpVersion: default HTTP version |
357 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
358 | * @return resource|object |
359 | * @throws Exception |
360 | */ |
361 | protected function getCurlHandle( array &$req, array $opts ) { |
362 | $ch = curl_init(); |
363 | |
364 | curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy ); |
365 | curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) ); |
366 | curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) ); |
367 | curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 ); |
368 | curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 ); |
369 | curl_setopt( $ch, CURLOPT_HEADER, 0 ); |
370 | if ( $this->caBundlePath !== null ) { |
371 | curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true ); |
372 | curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath ); |
373 | } |
374 | curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); |
375 | |
376 | $url = $req['url']; |
377 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
378 | if ( $query != '' ) { |
379 | $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; |
380 | } |
381 | curl_setopt( $ch, CURLOPT_URL, $url ); |
382 | curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] ); |
383 | curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) ); |
384 | curl_setopt( $ch, CURLOPT_HTTP_VERSION, $opts['httpVersion'] ?? CURL_HTTP_VERSION_NONE ); |
385 | |
386 | if ( $req['method'] === 'PUT' ) { |
387 | curl_setopt( $ch, CURLOPT_PUT, 1 ); |
388 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
389 | if ( is_resource( $req['body'] ) ) { |
390 | curl_setopt( $ch, CURLOPT_INFILE, $req['body'] ); |
391 | if ( isset( $req['headers']['content-length'] ) ) { |
392 | curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] ); |
393 | } elseif ( isset( $req['headers']['transfer-encoding'] ) && |
394 | $req['headers']['transfer-encoding'] === 'chunks' |
395 | ) { |
396 | curl_setopt( $ch, CURLOPT_UPLOAD, true ); |
397 | } else { |
398 | throw new InvalidArgumentException( "Missing 'Content-Length' or 'Transfer-Encoding' header." ); |
399 | } |
400 | } elseif ( $req['body'] !== '' ) { |
401 | $fp = fopen( "php://temp", "wb+" ); |
402 | fwrite( $fp, $req['body'], strlen( $req['body'] ) ); |
403 | rewind( $fp ); |
404 | curl_setopt( $ch, CURLOPT_INFILE, $fp ); |
405 | curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) ); |
406 | $req['_closeHandle'] = $fp; // remember to close this later |
407 | } else { |
408 | curl_setopt( $ch, CURLOPT_INFILESIZE, 0 ); |
409 | } |
410 | curl_setopt( $ch, CURLOPT_READFUNCTION, |
411 | static function ( $ch, $fd, $length ) { |
412 | return (string)fread( $fd, $length ); |
413 | } |
414 | ); |
415 | } elseif ( $req['method'] === 'POST' ) { |
416 | curl_setopt( $ch, CURLOPT_POST, 1 ); |
417 | curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] ); |
418 | } else { |
419 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
420 | if ( is_resource( $req['body'] ) || $req['body'] !== '' ) { |
421 | throw new InvalidArgumentException( "HTTP body specified for a non PUT/POST request." ); |
422 | } |
423 | $req['headers']['content-length'] = 0; |
424 | } |
425 | |
426 | if ( !isset( $req['headers']['user-agent'] ) ) { |
427 | $req['headers']['user-agent'] = $this->userAgent; |
428 | } |
429 | |
430 | $headers = []; |
431 | foreach ( $req['headers'] as $name => $value ) { |
432 | if ( strpos( $name, ':' ) !== false ) { |
433 | throw new InvalidArgumentException( "Header name must not contain colon-space." ); |
434 | } |
435 | $headers[] = $name . ': ' . trim( $value ); |
436 | } |
437 | curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); |
438 | |
439 | curl_setopt( $ch, CURLOPT_HEADERFUNCTION, |
440 | static function ( $ch, $header ) use ( &$req ) { |
441 | if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) { |
442 | header( $header ); |
443 | } |
444 | $length = strlen( $header ); |
445 | $matches = []; |
446 | if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) { |
447 | $req['response']['code'] = (int)$matches[2]; |
448 | $req['response']['reason'] = trim( $matches[3] ); |
449 | // After a redirect we will receive this again, but we already stored headers |
450 | // that belonged to a redirect response. Start over. |
451 | $req['response']['headers'] = []; |
452 | return $length; |
453 | } |
454 | if ( strpos( $header, ":" ) === false ) { |
455 | return $length; |
456 | } |
457 | [ $name, $value ] = explode( ":", $header, 2 ); |
458 | $name = strtolower( $name ); |
459 | $value = trim( $value ); |
460 | if ( isset( $req['response']['headers'][$name] ) ) { |
461 | $req['response']['headers'][$name] .= ', ' . $value; |
462 | } else { |
463 | $req['response']['headers'][$name] = $value; |
464 | } |
465 | return $length; |
466 | } |
467 | ); |
468 | |
469 | // This works with both file and php://temp handles (unlike CURLOPT_FILE) |
470 | $hasOutputStream = isset( $req['stream'] ); |
471 | curl_setopt( $ch, CURLOPT_WRITEFUNCTION, |
472 | static function ( $ch, $data ) use ( &$req, $hasOutputStream ) { |
473 | if ( $hasOutputStream ) { |
474 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive |
475 | return fwrite( $req['stream'], $data ); |
476 | } else { |
477 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
478 | $req['response']['body'] .= $data; |
479 | |
480 | return strlen( $data ); |
481 | } |
482 | } |
483 | ); |
484 | |
485 | return $ch; |
486 | } |
487 | |
488 | /** |
489 | * @param array $opts |
490 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
491 | * @return resource|object |
492 | * @throws Exception |
493 | */ |
494 | protected function getCurlMulti( array $opts ) { |
495 | if ( !$this->cmh ) { |
496 | $cmh = curl_multi_init(); |
497 | // Limit the size of the idle connection cache such that consecutive parallel |
498 | // request batches to the same host can avoid having to keep making connections |
499 | curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); |
500 | $this->cmh = $cmh; |
501 | } |
502 | |
503 | $curlVersion = curl_version()['version']; |
504 | |
505 | // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0 |
506 | if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) { |
507 | // Limit the number of in-flight requests for any given host |
508 | $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost; |
509 | curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns ); |
510 | } |
511 | |
512 | if ( $opts['usePipelining'] ?? $this->usePipelining ) { |
513 | if ( version_compare( $curlVersion, '7.43', '<' ) ) { |
514 | // The option is a boolean |
515 | $pipelining = 1; |
516 | } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) { |
517 | // The option is a bitfield and HTTP/1.x pipelining is supported |
518 | $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX; |
519 | } else { |
520 | // The option is a bitfield but HTTP/1.x pipelining has been removed |
521 | $pipelining = CURLPIPE_MULTIPLEX; |
522 | } |
523 | // Suppress deprecation, we know already (T264735) |
524 | // phpcs:ignore Generic.PHP.NoSilencedErrors |
525 | @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining ); |
526 | } |
527 | |
528 | return $this->cmh; |
529 | } |
530 | |
531 | /** |
532 | * Get a time in seconds, formatted with microsecond resolution, or fall back to second |
533 | * resolution on PHP 7.2 |
534 | * |
535 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintParam |
536 | * @param resource|object $ch |
537 | * @param int $oldOption |
538 | * @param string $newConstName |
539 | * @return string |
540 | */ |
541 | private function getCurlTime( $ch, $oldOption, $newConstName ): string { |
542 | if ( defined( $newConstName ) ) { |
543 | return sprintf( "%.6F", curl_getinfo( $ch, constant( $newConstName ) ) / 1e6 ); |
544 | } else { |
545 | return (string)curl_getinfo( $ch, $oldOption ); |
546 | } |
547 | } |
548 | |
549 | /** |
550 | * Execute a set of HTTP(S) requests sequentially. |
551 | * |
552 | * @see MultiHttpClient::runMulti() |
553 | * @todo Remove dependency on MediaWikiServices: rewrite using Guzzle T202352 |
554 | * @param array $reqs Map of HTTP request arrays |
555 | * @phpcs:ignore Generic.Files.LineLength |
556 | * @phan-param array<int,array{url:string,query:array,method:string,body:string,headers:array<string,string>,proxy?:?string}> $reqs |
557 | * @param array $opts |
558 | * - connTimeout : connection timeout per request (seconds) |
559 | * - reqTimeout : post-connection timeout per request (seconds) |
560 | * @phan-param array{connTimeout:int,reqTimeout:int} $opts |
561 | * @return array $reqs With response array populated for each |
562 | * @throws Exception |
563 | */ |
564 | private function runMultiHttp( array $reqs, array $opts = [] ) { |
565 | $httpOptions = [ |
566 | 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout, |
567 | 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout, |
568 | 'logger' => $this->logger, |
569 | 'caInfo' => $this->caBundlePath, |
570 | ]; |
571 | foreach ( $reqs as &$req ) { |
572 | $reqOptions = $httpOptions + [ |
573 | 'method' => $req['method'], |
574 | 'proxy' => $req['proxy'] ?? $this->proxy, |
575 | 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent, |
576 | 'postData' => $req['body'], |
577 | ]; |
578 | |
579 | $url = $req['url']; |
580 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
581 | if ( $query != '' ) { |
582 | $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; |
583 | } |
584 | |
585 | $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( |
586 | $url, $reqOptions, __METHOD__ ); |
587 | $httpRequest->setLogger( $this->logger ); |
588 | foreach ( $req['headers'] as $header => $value ) { |
589 | $httpRequest->setHeader( $header, $value ); |
590 | } |
591 | $sv = $httpRequest->execute()->getStatusValue(); |
592 | |
593 | $respHeaders = array_map( |
594 | static function ( $v ) { |
595 | return implode( ', ', $v ); |
596 | }, |
597 | $httpRequest->getResponseHeaders() ); |
598 | |
599 | $req['response'] = [ |
600 | 'code' => $httpRequest->getStatus(), |
601 | 'reason' => '', |
602 | 'headers' => $respHeaders, |
603 | 'body' => $httpRequest->getContent(), |
604 | 'error' => '', |
605 | ]; |
606 | |
607 | if ( !$sv->isOK() ) { |
608 | $svErrors = $sv->getErrors(); |
609 | if ( isset( $svErrors[0] ) ) { |
610 | $req['response']['error'] = $svErrors[0]['message']; |
611 | |
612 | // param values vary per failure type (ex. unknown host vs unknown page) |
613 | if ( isset( $svErrors[0]['params'][0] ) ) { |
614 | if ( is_numeric( $svErrors[0]['params'][0] ) ) { |
615 | if ( isset( $svErrors[0]['params'][1] ) ) { |
616 | // @phan-suppress-next-line PhanTypeInvalidDimOffset |
617 | $req['response']['reason'] = $svErrors[0]['params'][1]; |
618 | } |
619 | } else { |
620 | $req['response']['reason'] = $svErrors[0]['params'][0]; |
621 | } |
622 | } |
623 | } |
624 | } |
625 | |
626 | $req['response'][0] = $req['response']['code']; |
627 | $req['response'][1] = $req['response']['reason']; |
628 | $req['response'][2] = $req['response']['headers']; |
629 | $req['response'][3] = $req['response']['body']; |
630 | $req['response'][4] = $req['response']['error']; |
631 | } |
632 | |
633 | return $reqs; |
634 | } |
635 | |
636 | /** |
637 | * Normalize headers array |
638 | * @param array $headers |
639 | * @return array |
640 | */ |
641 | private function normalizeHeaders( array $headers ): array { |
642 | $normalized = []; |
643 | foreach ( $headers as $name => $value ) { |
644 | $normalized[strtolower( $name )] = $value; |
645 | } |
646 | return $normalized; |
647 | } |
648 | |
649 | /** |
650 | * Normalize request information |
651 | * |
652 | * @param array[] &$reqs the requests to normalize |
653 | */ |
654 | private function normalizeRequests( array &$reqs ) { |
655 | foreach ( $reqs as &$req ) { |
656 | $req['response'] = [ |
657 | 'code' => 0, |
658 | 'reason' => '', |
659 | 'headers' => [], |
660 | 'body' => '', |
661 | 'error' => '' |
662 | ]; |
663 | if ( isset( $req[0] ) ) { |
664 | $req['method'] = $req[0]; // short-form |
665 | unset( $req[0] ); |
666 | } |
667 | if ( isset( $req[1] ) ) { |
668 | $req['url'] = $req[1]; // short-form |
669 | unset( $req[1] ); |
670 | } |
671 | if ( !isset( $req['method'] ) ) { |
672 | throw new InvalidArgumentException( "Request has no 'method' field set." ); |
673 | } elseif ( !isset( $req['url'] ) ) { |
674 | throw new InvalidArgumentException( "Request has no 'url' field set." ); |
675 | } |
676 | if ( $this->localProxy !== false && $this->isLocalURL( $req['url'] ) ) { |
677 | $this->useReverseProxy( $req, $this->localProxy ); |
678 | } |
679 | $req['query'] ??= []; |
680 | $req['headers'] = $this->normalizeHeaders( |
681 | array_merge( |
682 | $this->headers, |
683 | $this->telemetry ? $this->telemetry->getRequestHeaders() : [], |
684 | $req['headers'] ?? [] |
685 | ) |
686 | ); |
687 | |
688 | if ( !isset( $req['body'] ) ) { |
689 | $req['body'] = ''; |
690 | $req['headers']['content-length'] = 0; |
691 | } |
692 | // Redact some headers we know to have tokens before logging them |
693 | $logHeaders = $req['headers']; |
694 | foreach ( $logHeaders as $header => $value ) { |
695 | if ( preg_match( self::SENSITIVE_HEADERS, $header ) === 1 ) { |
696 | $logHeaders[$header] = '[redacted]'; |
697 | } |
698 | } |
699 | $this->logger->debug( "HTTP start: {method} {url}", |
700 | [ |
701 | 'method' => $req['method'], |
702 | 'url' => $req['url'], |
703 | 'headers' => $logHeaders, |
704 | ] |
705 | ); |
706 | $req['flags'] ??= []; |
707 | } |
708 | } |
709 | |
710 | private function useReverseProxy( array &$req, $proxy ) { |
711 | $parsedProxy = wfParseUrl( $proxy ); |
712 | if ( $parsedProxy === false ) { |
713 | throw new InvalidArgumentException( "Invalid reverseProxy configured: $proxy" ); |
714 | } |
715 | $parsedUrl = wfParseUrl( $req['url'] ); |
716 | if ( $parsedUrl === false ) { |
717 | throw new InvalidArgumentException( "Invalid url specified: {$req['url']}" ); |
718 | } |
719 | // Set the current host in the Host header |
720 | $req['headers']['Host'] = $parsedUrl['host']; |
721 | // Replace scheme, host and port in the request |
722 | $parsedUrl['scheme'] = $parsedProxy['scheme']; |
723 | $parsedUrl['host'] = $parsedProxy['host']; |
724 | if ( isset( $parsedProxy['port'] ) ) { |
725 | $parsedUrl['port'] = $parsedProxy['port']; |
726 | } else { |
727 | unset( $parsedUrl['port'] ); |
728 | } |
729 | $req['url'] = wfAssembleUrl( $parsedUrl ); |
730 | // Explicitly disable use of another proxy by setting to false, |
731 | // since null will fallback to $this->proxy |
732 | $req['proxy'] = false; |
733 | } |
734 | |
735 | /** |
736 | * Check if the URL can be served by localhost |
737 | * |
738 | * @note this is mostly a copy of MWHttpRequest::isLocalURL() |
739 | * @param string $url Full url to check |
740 | * @return bool |
741 | */ |
742 | private function isLocalURL( $url ) { |
743 | if ( !$this->localVirtualHosts ) { |
744 | // Shortcut |
745 | return false; |
746 | } |
747 | |
748 | // Extract host part |
749 | $matches = []; |
750 | if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) { |
751 | $host = $matches[1]; |
752 | // Split up dotwise |
753 | $domainParts = explode( '.', $host ); |
754 | // Check if this domain or any superdomain is listed as a local virtual host |
755 | $domainParts = array_reverse( $domainParts ); |
756 | |
757 | $domain = ''; |
758 | $countParts = count( $domainParts ); |
759 | for ( $i = 0; $i < $countParts; $i++ ) { |
760 | $domainPart = $domainParts[$i]; |
761 | if ( $i == 0 ) { |
762 | $domain = $domainPart; |
763 | } else { |
764 | $domain = $domainPart . '.' . $domain; |
765 | } |
766 | |
767 | if ( in_array( $domain, $this->localVirtualHosts ) ) { |
768 | return true; |
769 | } |
770 | } |
771 | } |
772 | |
773 | return false; |
774 | } |
775 | |
776 | /** |
777 | * Get a suitable select timeout for the given options. |
778 | * |
779 | * @param array $opts |
780 | * @return float |
781 | */ |
782 | private function getSelectTimeout( $opts ) { |
783 | $connTimeout = $opts['connTimeout'] ?? $this->connTimeout; |
784 | $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout; |
785 | $timeouts = array_filter( [ $connTimeout, $reqTimeout ] ); |
786 | if ( count( $timeouts ) === 0 ) { |
787 | return 1; |
788 | } |
789 | |
790 | $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR; |
791 | // Minimum 10us |
792 | if ( $selectTimeout < 10e-6 ) { |
793 | $selectTimeout = 10e-6; |
794 | } |
795 | return $selectTimeout; |
796 | } |
797 | |
798 | /** |
799 | * Register a logger |
800 | * |
801 | * @param LoggerInterface $logger |
802 | */ |
803 | public function setLogger( LoggerInterface $logger ) { |
804 | $this->logger = $logger; |
805 | } |
806 | |
807 | public function __destruct() { |
808 | if ( $this->cmh ) { |
809 | curl_multi_close( $this->cmh ); |
810 | $this->cmh = null; |
811 | } |
812 | } |
813 | |
814 | } |