Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
80.47% |
305 / 379 |
|
29.41% |
5 / 17 |
CRAP | |
0.00% |
0 / 1 |
| MultiHttpClient | |
80.69% |
305 / 378 |
|
29.41% |
5 / 17 |
204.97 | |
0.00% |
0 / 1 |
| __construct | |
76.92% |
10 / 13 |
|
0.00% |
0 / 1 |
5.31 | |||
| run | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| runMulti | |
68.00% |
17 / 25 |
|
0.00% |
0 / 1 |
18.54 | |||
| isCurlEnabled | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| runMultiCurl | |
77.33% |
58 / 75 |
|
0.00% |
0 / 1 |
13.68 | |||
| getCurlHandle | |
64.77% |
57 / 88 |
|
0.00% |
0 / 1 |
43.16 | |||
| getCurlMulti | |
87.50% |
14 / 16 |
|
0.00% |
0 / 1 |
6.07 | |||
| getCurlTime | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| runMultiHttp | |
97.92% |
47 / 48 |
|
0.00% |
0 / 1 |
10 | |||
| normalizeHeaders | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| normalizeRequests | |
93.02% |
40 / 43 |
|
0.00% |
0 / 1 |
12.05 | |||
| useReverseProxy | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
4.16 | |||
| assembleUrl | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
10 | |||
| isLocalURL | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
6.01 | |||
| getSelectTimeout | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
3.10 | |||
| setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| __destruct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * HTTP service client |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | */ |
| 8 | |
| 9 | namespace Wikimedia\Http; |
| 10 | |
| 11 | use InvalidArgumentException; |
| 12 | use MediaWiki\MediaWikiServices; |
| 13 | use Psr\Log\LoggerAwareInterface; |
| 14 | use Psr\Log\LoggerInterface; |
| 15 | use Psr\Log\NullLogger; |
| 16 | use RuntimeException; |
| 17 | |
| 18 | /** |
| 19 | * Class to handle multiple HTTP requests |
| 20 | * |
| 21 | * If curl is available, requests will be made concurrently. |
| 22 | * Otherwise, they will be made serially. |
| 23 | * |
| 24 | * HTTP request maps are arrays that use the following format: |
| 25 | * - method : GET/HEAD/PUT/POST/DELETE |
| 26 | * - url : HTTP/HTTPS URL |
| 27 | * - query : <query parameter field/value associative array> (uses RFC 3986) |
| 28 | * - headers : <header name/value associative array> |
| 29 | * - body : source to get the HTTP request body from; |
| 30 | * this can simply be a string (always), a resource for |
| 31 | * PUT requests, and a field/value array for POST request; |
| 32 | * array bodies are encoded as multipart/form-data and strings |
| 33 | * use application/x-www-form-urlencoded (headers sent automatically) |
| 34 | * - stream : resource to stream the HTTP response body to |
| 35 | * - proxy : HTTP proxy to use |
| 36 | * - flags : map of boolean flags which supports: |
| 37 | * - relayResponseHeaders : write out header via header() |
| 38 | * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'. |
| 39 | * |
| 40 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
| 41 | * a client object with appropriately configured timeouts. |
| 42 | * |
| 43 | * @since 1.23 |
| 44 | */ |
| 45 | class MultiHttpClient implements LoggerAwareInterface { |
| 46 | /** Regex for headers likely to contain tokens, etc. that we want to redact from logs */ |
| 47 | private const SENSITIVE_HEADERS = '/(^|-|_)(authorization|auth|password|cookie)($|-|_)/'; |
| 48 | /** |
| 49 | * @phpcs:ignore MediaWiki.Commenting.PropertyDocumentation.ObjectTypeHintVar |
| 50 | * @var resource|object|null curl_multi_init() handle, initialized in getCurlMulti() |
| 51 | */ |
| 52 | protected $cmh = null; |
| 53 | /** @var string|null SSL certificates path */ |
| 54 | protected $caBundlePath; |
| 55 | /** @var float */ |
| 56 | protected $connTimeout = 10; |
| 57 | /** @var float */ |
| 58 | protected $maxConnTimeout = INF; |
| 59 | /** @var float */ |
| 60 | protected $reqTimeout = 30; |
| 61 | /** @var float */ |
| 62 | protected $maxReqTimeout = INF; |
| 63 | /** @var bool */ |
| 64 | protected $usePipelining = false; |
| 65 | /** @var int */ |
| 66 | protected $maxConnsPerHost = 50; |
| 67 | /** @var string|null */ |
| 68 | protected $proxy; |
| 69 | /** @var string|false */ |
| 70 | protected $localProxy = false; |
| 71 | /** @var string[] */ |
| 72 | protected $localVirtualHosts = []; |
| 73 | /** @var string */ |
| 74 | protected $userAgent = 'wikimedia/multi-http-client v1.1'; |
| 75 | /** @var LoggerInterface */ |
| 76 | protected $logger; |
| 77 | protected array $headers = []; |
| 78 | |
| 79 | // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect |
| 80 | // timeouts are periodically polled instead of being accurately respected. |
| 81 | // The select timeout is set to the minimum timeout multiplied by this factor. |
| 82 | private const TIMEOUT_ACCURACY_FACTOR = 0.1; |
| 83 | |
| 84 | private ?TelemetryHeadersInterface $telemetry = null; |
| 85 | |
| 86 | /** |
| 87 | * Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get |
| 88 | * a client object with appropriately configured timeouts instead of constructing |
| 89 | * a MultiHttpClient directly. |
| 90 | * |
| 91 | * @param array $options |
| 92 | * - connTimeout : default connection timeout (seconds) |
| 93 | * - reqTimeout : default request timeout (seconds) |
| 94 | * - maxConnTimeout : maximum connection timeout (seconds) |
| 95 | * - maxReqTimeout : maximum request timeout (seconds) |
| 96 | * - proxy : HTTP proxy to use |
| 97 | * - localProxy : Reverse proxy to use for domains in localVirtualHosts |
| 98 | * - localVirtualHosts : Domains that are configured as virtual hosts on the same machine |
| 99 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
| 100 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
| 101 | * - userAgent : The User-Agent header value to send |
| 102 | * - logger : a \Psr\Log\LoggerInterface instance for debug logging |
| 103 | * - caBundlePath : path to specific Certificate Authority bundle (if any) |
| 104 | * - headers : an array of default headers to send with every request |
| 105 | * - telemetry : a \Wikimedia\Http\RequestTelemetry instance to track telemetry data |
| 106 | * @throws \Exception |
| 107 | */ |
| 108 | public function __construct( array $options ) { |
| 109 | if ( isset( $options['caBundlePath'] ) ) { |
| 110 | $this->caBundlePath = $options['caBundlePath']; |
| 111 | if ( !file_exists( $this->caBundlePath ) ) { |
| 112 | throw new InvalidArgumentException( "Cannot find CA bundle: " . $this->caBundlePath ); |
| 113 | } |
| 114 | } |
| 115 | static $opts = [ |
| 116 | 'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout', |
| 117 | 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger', |
| 118 | 'localProxy', 'localVirtualHosts', 'headers', 'telemetry' |
| 119 | ]; |
| 120 | foreach ( $opts as $key ) { |
| 121 | if ( isset( $options[$key] ) ) { |
| 122 | $this->$key = $options[$key]; |
| 123 | } |
| 124 | } |
| 125 | $this->logger ??= new NullLogger; |
| 126 | } |
| 127 | |
| 128 | /** |
| 129 | * Execute an HTTP(S) request |
| 130 | * |
| 131 | * This method returns a response map of: |
| 132 | * - code : HTTP response code or 0 if there was a serious error |
| 133 | * - reason : HTTP response reason (empty if there was a serious error) |
| 134 | * - headers : <header name/value associative array> |
| 135 | * - body : HTTP response body or resource (if "stream" was set) |
| 136 | * - error : Any error string |
| 137 | * The map also stores integer-indexed copies of these values. This lets callers do: |
| 138 | * @code |
| 139 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $http->run( $req ); |
| 140 | * @endcode |
| 141 | * @param array $req HTTP request array |
| 142 | * @param array $opts |
| 143 | * - connTimeout : connection timeout per request (seconds) |
| 144 | * - reqTimeout : post-connection timeout per request (seconds) |
| 145 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
| 146 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
| 147 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2', 'v2.0', 'v3' or 'v3.0'. Leave empty to use |
| 148 | * PHP/curl's default |
| 149 | * @param string $caller The method making this request, for attribution in logs |
| 150 | * @return array Response array for request |
| 151 | */ |
| 152 | public function run( array $req, array $opts = [], string $caller = __METHOD__ ) { |
| 153 | return $this->runMulti( [ $req ], $opts, $caller )[0]['response']; |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * Execute a set of HTTP(S) requests. |
| 158 | * |
| 159 | * If curl is available, requests will be made concurrently. |
| 160 | * Otherwise, they will be made serially. |
| 161 | * |
| 162 | * The maps are returned by this method with the 'response' field set to a map of: |
| 163 | * - code : HTTP response code or 0 if there was a serious error |
| 164 | * - reason : HTTP response reason (empty if there was a serious error) |
| 165 | * - headers : <header name/value associative array> |
| 166 | * - body : HTTP response body or resource (if "stream" was set) |
| 167 | * - error : Any error string |
| 168 | * The map also stores integer-indexed copies of these values. This lets callers do: |
| 169 | * @code |
| 170 | * [ $rcode, $rdesc, $rhdrs, $rbody, $rerr ] = $req['response']; |
| 171 | * @endcode |
| 172 | * All headers in the 'headers' field are normalized to use lower case names. |
| 173 | * This is true for the request headers and the response headers. Integer-indexed |
| 174 | * method/URL entries will also be changed to use the corresponding string keys. |
| 175 | * |
| 176 | * @param array[] $reqs Map of HTTP request arrays |
| 177 | * @param array $opts Options |
| 178 | * - connTimeout : connection timeout per request (seconds) |
| 179 | * - reqTimeout : post-connection timeout per request (seconds) |
| 180 | * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) |
| 181 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
| 182 | * - httpVersion : One of 'v1.0', 'v1.1', 'v2', 'v2.0', 'v3' or 'v3.0'. Leave empty to use |
| 183 | * PHP/curl's default |
| 184 | * @param string $caller The method making these requests, for attribution in logs |
| 185 | * @return array[] $reqs With response array populated for each |
| 186 | * @throws \Exception |
| 187 | */ |
| 188 | public function runMulti( array $reqs, array $opts = [], string $caller = __METHOD__ ) { |
| 189 | $this->normalizeRequests( $reqs ); |
| 190 | $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ]; |
| 191 | |
| 192 | if ( $this->maxConnTimeout && $opts['connTimeout'] > $this->maxConnTimeout ) { |
| 193 | $opts['connTimeout'] = $this->maxConnTimeout; |
| 194 | } |
| 195 | if ( $this->maxReqTimeout && $opts['reqTimeout'] > $this->maxReqTimeout ) { |
| 196 | $opts['reqTimeout'] = $this->maxReqTimeout; |
| 197 | } |
| 198 | |
| 199 | if ( $this->isCurlEnabled() ) { |
| 200 | switch ( $opts['httpVersion'] ?? null ) { |
| 201 | case 'v1.0': |
| 202 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_0; |
| 203 | break; |
| 204 | case 'v1.1': |
| 205 | $opts['httpVersion'] = CURL_HTTP_VERSION_1_1; |
| 206 | break; |
| 207 | case 'v2': |
| 208 | case 'v2.0': |
| 209 | $opts['httpVersion'] = CURL_HTTP_VERSION_2_0; |
| 210 | break; |
| 211 | case 'v3': |
| 212 | case 'v3.0': |
| 213 | $opts['httpVersion'] = CURL_HTTP_VERSION_3; |
| 214 | break; |
| 215 | default: |
| 216 | $opts['httpVersion'] = CURL_HTTP_VERSION_NONE; |
| 217 | } |
| 218 | return $this->runMultiCurl( $reqs, $opts, $caller ); |
| 219 | } else { |
| 220 | # TODO: Add handling for httpVersion option |
| 221 | return $this->runMultiHttp( $reqs, $opts ); |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | /** |
| 226 | * Determines if the curl extension is available |
| 227 | * |
| 228 | * @return bool true if curl is available, false otherwise. |
| 229 | */ |
| 230 | protected function isCurlEnabled() { |
| 231 | // Explicitly test if curl_multi* is blocked, as some users' hosts provide |
| 232 | // them with a modified curl with the multi-threaded parts removed(!) |
| 233 | return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' ); |
| 234 | } |
| 235 | |
| 236 | /** |
| 237 | * Execute a set of HTTP(S) requests concurrently |
| 238 | * |
| 239 | * @see MultiHttpClient::runMulti() |
| 240 | * |
| 241 | * @param array[] $reqs Map of HTTP request arrays |
| 242 | * @param array $opts |
| 243 | * - connTimeout : connection timeout per request (seconds) |
| 244 | * - reqTimeout : post-connection timeout per request (seconds) |
| 245 | * - usePipelining : whether to use HTTP pipelining if possible |
| 246 | * - maxConnsPerHost : maximum number of concurrent connections (per host) |
| 247 | * - httpVersion: : HTTP version to use |
| 248 | * @phan-param array{connTimeout?:int,reqTimeout?:int,usePipelining?:bool,maxConnsPerHost?:int} $opts |
| 249 | * @param string $caller The method making these requests, for attribution in logs |
| 250 | * @return array $reqs With response array populated for each |
| 251 | * @throws \Exception |
| 252 | * @suppress PhanTypeInvalidDimOffset |
| 253 | */ |
| 254 | private function runMultiCurl( array $reqs, array $opts, string $caller = __METHOD__ ) { |
| 255 | $chm = $this->getCurlMulti( $opts ); |
| 256 | |
| 257 | $selectTimeout = $this->getSelectTimeout( $opts ); |
| 258 | |
| 259 | // Add all of the required cURL handles... |
| 260 | $handles = []; |
| 261 | foreach ( $reqs as $index => &$req ) { |
| 262 | $handles[$index] = $this->getCurlHandle( $req, $opts ); |
| 263 | curl_multi_add_handle( $chm, $handles[$index] ); |
| 264 | } |
| 265 | unset( $req ); // don't assign over this by accident |
| 266 | |
| 267 | $infos = []; |
| 268 | // Execute the cURL handles concurrently... |
| 269 | $active = null; // handles still being processed |
| 270 | do { |
| 271 | // Do any available work... |
| 272 | $mrc = curl_multi_exec( $chm, $active ); |
| 273 | |
| 274 | if ( $mrc !== CURLM_OK ) { |
| 275 | $error = curl_multi_strerror( $mrc ); |
| 276 | $this->logger->error( 'curl_multi_exec() failed: {error}', [ |
| 277 | 'error' => $error, |
| 278 | 'exception' => new RuntimeException(), |
| 279 | 'method' => $caller, |
| 280 | ] ); |
| 281 | break; |
| 282 | } |
| 283 | |
| 284 | // Wait (if possible) for available work... |
| 285 | if ( $active > 0 && curl_multi_select( $chm, $selectTimeout ) === -1 ) { |
| 286 | $errno = curl_multi_errno( $chm ); |
| 287 | $error = curl_multi_strerror( $errno ); |
| 288 | $this->logger->error( 'curl_multi_select() failed: {error}', [ |
| 289 | 'error' => $error, |
| 290 | 'exception' => new RuntimeException(), |
| 291 | 'method' => $caller, |
| 292 | ] ); |
| 293 | } |
| 294 | } while ( $active > 0 ); |
| 295 | |
| 296 | $queuedMessages = null; |
| 297 | do { |
| 298 | $info = curl_multi_info_read( $chm, $queuedMessages ); |
| 299 | if ( $info !== false && $info['msg'] === CURLMSG_DONE ) { |
| 300 | // Note: cast to integer even works on PHP 8.0+ despite the |
| 301 | // handle being an object not a resource, because CurlHandle |
| 302 | // has a backwards-compatible cast_object handler. |
| 303 | $infos[(int)$info['handle']] = $info; |
| 304 | } |
| 305 | } while ( $queuedMessages > 0 ); |
| 306 | |
| 307 | // Remove all of the added cURL handles and check for errors... |
| 308 | foreach ( $reqs as $index => &$req ) { |
| 309 | $ch = $handles[$index]; |
| 310 | curl_multi_remove_handle( $chm, $ch ); |
| 311 | |
| 312 | if ( isset( $infos[(int)$ch] ) ) { |
| 313 | $info = $infos[(int)$ch]; |
| 314 | $errno = $info['result']; |
| 315 | if ( $errno !== 0 ) { |
| 316 | $req['response']['error'] = "(curl error: $errno)"; |
| 317 | if ( function_exists( 'curl_strerror' ) ) { |
| 318 | $req['response']['error'] .= " " . curl_strerror( $errno ); |
| 319 | } |
| 320 | $this->logger->error( 'Error fetching URL "{url}": {error}', [ |
| 321 | 'url' => $req['url'], |
| 322 | 'error' => $req['response']['error'], |
| 323 | 'exception' => new RuntimeException(), |
| 324 | 'method' => $caller, |
| 325 | ] ); |
| 326 | } else { |
| 327 | $this->logger->debug( |
| 328 | "HTTP complete: {method} {url} code={response_code} size={size} " . |
| 329 | "total={total_time} connect={connect_time}", |
| 330 | [ |
| 331 | 'method' => $req['method'], |
| 332 | 'url' => $req['url'], |
| 333 | 'response_code' => $req['response']['code'], |
| 334 | 'size' => curl_getinfo( $ch, CURLINFO_SIZE_DOWNLOAD ), |
| 335 | 'total_time' => $this->getCurlTime( |
| 336 | $ch, CURLINFO_TOTAL_TIME, 'CURLINFO_TOTAL_TIME_T' |
| 337 | ), |
| 338 | 'connect_time' => $this->getCurlTime( |
| 339 | $ch, CURLINFO_CONNECT_TIME, 'CURLINFO_CONNECT_TIME_T' |
| 340 | ), |
| 341 | ] |
| 342 | ); |
| 343 | } |
| 344 | } else { |
| 345 | $req['response']['error'] = "(curl error: no status set)"; |
| 346 | } |
| 347 | |
| 348 | // For convenience with array destructuring |
| 349 | $req['response'][0] = $req['response']['code']; |
| 350 | $req['response'][1] = $req['response']['reason']; |
| 351 | $req['response'][2] = $req['response']['headers']; |
| 352 | $req['response'][3] = $req['response']['body']; |
| 353 | $req['response'][4] = $req['response']['error']; |
| 354 | // Close any string wrapper file handles |
| 355 | if ( isset( $req['_closeHandle'] ) ) { |
| 356 | fclose( $req['_closeHandle'] ); |
| 357 | unset( $req['_closeHandle'] ); |
| 358 | } |
| 359 | } |
| 360 | unset( $req ); // don't assign over this by accident |
| 361 | |
| 362 | return $reqs; |
| 363 | } |
| 364 | |
| 365 | /** |
| 366 | * @param array &$req HTTP request map |
| 367 | * @phpcs:ignore Generic.Files.LineLength |
| 368 | * @phan-param array{url:string,proxy?:?string,query:mixed,method:string,body:string|resource,headers:array<string,string>,stream?:resource,flags:array} $req |
| 369 | * @param array $opts |
| 370 | * - connTimeout : default connection timeout |
| 371 | * - reqTimeout : default request timeout |
| 372 | * - httpVersion: default HTTP version |
| 373 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
| 374 | * @return resource|object |
| 375 | * @throws \Exception |
| 376 | */ |
| 377 | protected function getCurlHandle( array &$req, array $opts ) { |
| 378 | $ch = curl_init(); |
| 379 | |
| 380 | curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy ); |
| 381 | curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) ); |
| 382 | curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) ); |
| 383 | curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 ); |
| 384 | curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 ); |
| 385 | curl_setopt( $ch, CURLOPT_HEADER, 0 ); |
| 386 | if ( $this->caBundlePath !== null ) { |
| 387 | curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true ); |
| 388 | curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath ); |
| 389 | } |
| 390 | curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); |
| 391 | |
| 392 | $url = $req['url']; |
| 393 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
| 394 | if ( $query != '' ) { |
| 395 | $url .= !str_contains( $req['url'], '?' ) ? "?$query" : "&$query"; |
| 396 | } |
| 397 | curl_setopt( $ch, CURLOPT_URL, $url ); |
| 398 | curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] ); |
| 399 | curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) ); |
| 400 | curl_setopt( $ch, CURLOPT_HTTP_VERSION, $opts['httpVersion'] ?? CURL_HTTP_VERSION_NONE ); |
| 401 | |
| 402 | if ( $req['method'] === 'PUT' ) { |
| 403 | curl_setopt( $ch, CURLOPT_PUT, 1 ); |
| 404 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
| 405 | if ( is_resource( $req['body'] ) ) { |
| 406 | curl_setopt( $ch, CURLOPT_INFILE, $req['body'] ); |
| 407 | if ( isset( $req['headers']['content-length'] ) ) { |
| 408 | curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] ); |
| 409 | } elseif ( isset( $req['headers']['transfer-encoding'] ) && |
| 410 | $req['headers']['transfer-encoding'] === 'chunks' |
| 411 | ) { |
| 412 | curl_setopt( $ch, CURLOPT_UPLOAD, true ); |
| 413 | } else { |
| 414 | throw new InvalidArgumentException( "Missing 'Content-Length' or 'Transfer-Encoding' header." ); |
| 415 | } |
| 416 | } elseif ( $req['body'] !== '' ) { |
| 417 | $fp = fopen( "php://temp", "wb+" ); |
| 418 | fwrite( $fp, $req['body'], strlen( $req['body'] ) ); |
| 419 | rewind( $fp ); |
| 420 | curl_setopt( $ch, CURLOPT_INFILE, $fp ); |
| 421 | curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) ); |
| 422 | $req['_closeHandle'] = $fp; // remember to close this later |
| 423 | } else { |
| 424 | curl_setopt( $ch, CURLOPT_INFILESIZE, 0 ); |
| 425 | } |
| 426 | curl_setopt( $ch, CURLOPT_READFUNCTION, |
| 427 | static function ( $ch, $fd, $length ) { |
| 428 | return (string)fread( $fd, $length ); |
| 429 | } |
| 430 | ); |
| 431 | } elseif ( $req['method'] === 'POST' ) { |
| 432 | curl_setopt( $ch, CURLOPT_POST, 1 ); |
| 433 | curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] ); |
| 434 | } else { |
| 435 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
| 436 | if ( is_resource( $req['body'] ) || $req['body'] !== '' ) { |
| 437 | throw new InvalidArgumentException( "HTTP body specified for a non PUT/POST request." ); |
| 438 | } |
| 439 | $req['headers']['content-length'] = 0; |
| 440 | } |
| 441 | |
| 442 | if ( !isset( $req['headers']['user-agent'] ) ) { |
| 443 | $req['headers']['user-agent'] = $this->userAgent; |
| 444 | } |
| 445 | |
| 446 | $headers = []; |
| 447 | foreach ( $req['headers'] as $name => $value ) { |
| 448 | if ( str_contains( $name, ':' ) ) { |
| 449 | throw new InvalidArgumentException( "Header name must not contain colon-space." ); |
| 450 | } |
| 451 | $headers[] = $name . ': ' . trim( $value ); |
| 452 | } |
| 453 | curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); |
| 454 | |
| 455 | curl_setopt( $ch, CURLOPT_HEADERFUNCTION, |
| 456 | static function ( $ch, $header ) use ( &$req ) { |
| 457 | if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) { |
| 458 | header( $header ); |
| 459 | } |
| 460 | $length = strlen( $header ); |
| 461 | $matches = []; |
| 462 | if ( preg_match( "/^(HTTP\/(?:1\.[01]|2|3)) (\d{3}) (.*)/", $header, $matches ) ) { |
| 463 | $req['response']['code'] = (int)$matches[2]; |
| 464 | $req['response']['reason'] = trim( $matches[3] ); |
| 465 | // After a redirect we will receive this again, but we already stored headers |
| 466 | // that belonged to a redirect response. Start over. |
| 467 | $req['response']['headers'] = []; |
| 468 | return $length; |
| 469 | } |
| 470 | if ( !str_contains( $header, ":" ) ) { |
| 471 | return $length; |
| 472 | } |
| 473 | [ $name, $value ] = explode( ":", $header, 2 ); |
| 474 | $name = strtolower( $name ); |
| 475 | $value = trim( $value ); |
| 476 | if ( isset( $req['response']['headers'][$name] ) ) { |
| 477 | $req['response']['headers'][$name] .= ', ' . $value; |
| 478 | } else { |
| 479 | $req['response']['headers'][$name] = $value; |
| 480 | } |
| 481 | return $length; |
| 482 | } |
| 483 | ); |
| 484 | |
| 485 | // This works with both file and php://temp handles (unlike CURLOPT_FILE) |
| 486 | $hasOutputStream = isset( $req['stream'] ); |
| 487 | curl_setopt( $ch, CURLOPT_WRITEFUNCTION, |
| 488 | static function ( $ch, $data ) use ( &$req, $hasOutputStream ) { |
| 489 | if ( $hasOutputStream ) { |
| 490 | return fwrite( $req['stream'], $data ); |
| 491 | } else { |
| 492 | $req['response']['body'] .= $data; |
| 493 | |
| 494 | return strlen( $data ); |
| 495 | } |
| 496 | } |
| 497 | ); |
| 498 | |
| 499 | return $ch; |
| 500 | } |
| 501 | |
| 502 | /** |
| 503 | * @param array $opts |
| 504 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintReturn |
| 505 | * @return resource|object |
| 506 | * @throws \Exception |
| 507 | */ |
| 508 | protected function getCurlMulti( array $opts ) { |
| 509 | if ( !$this->cmh ) { |
| 510 | $cmh = curl_multi_init(); |
| 511 | // Limit the size of the idle connection cache such that consecutive parallel |
| 512 | // request batches to the same host can avoid having to keep making connections |
| 513 | curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); |
| 514 | $this->cmh = $cmh; |
| 515 | } |
| 516 | |
| 517 | $curlVersion = curl_version()['version']; |
| 518 | |
| 519 | // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0 |
| 520 | if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) { |
| 521 | // Limit the number of in-flight requests for any given host |
| 522 | $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost; |
| 523 | curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns ); |
| 524 | } |
| 525 | |
| 526 | if ( $opts['usePipelining'] ?? $this->usePipelining ) { |
| 527 | if ( version_compare( $curlVersion, '7.43', '<' ) ) { |
| 528 | // The option is a boolean |
| 529 | $pipelining = 1; |
| 530 | } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) { |
| 531 | // The option is a bitfield and HTTP/1.x pipelining is supported |
| 532 | $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX; |
| 533 | } else { |
| 534 | // The option is a bitfield but HTTP/1.x pipelining has been removed |
| 535 | $pipelining = CURLPIPE_MULTIPLEX; |
| 536 | } |
| 537 | // Suppress deprecation, we know already (T264735) |
| 538 | // phpcs:ignore Generic.PHP.NoSilencedErrors |
| 539 | @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining ); |
| 540 | } |
| 541 | |
| 542 | return $this->cmh; |
| 543 | } |
| 544 | |
| 545 | /** |
| 546 | * Get a time in seconds, formatted with microsecond resolution, or fall back to second |
| 547 | * resolution on PHP 7.2 |
| 548 | * |
| 549 | * @phpcs:ignore MediaWiki.Commenting.FunctionComment.ObjectTypeHintParam |
| 550 | * @param resource|object $ch |
| 551 | * @param int $oldOption |
| 552 | * @param string $newConstName |
| 553 | * @return string |
| 554 | */ |
| 555 | private function getCurlTime( $ch, $oldOption, $newConstName ): string { |
| 556 | if ( defined( $newConstName ) ) { |
| 557 | return sprintf( "%.6F", curl_getinfo( $ch, constant( $newConstName ) ) / 1e6 ); |
| 558 | } else { |
| 559 | return (string)curl_getinfo( $ch, $oldOption ); |
| 560 | } |
| 561 | } |
| 562 | |
| 563 | /** |
| 564 | * Execute a set of HTTP(S) requests sequentially. |
| 565 | * |
| 566 | * @see MultiHttpClient::runMulti() |
| 567 | * @todo Remove dependency on MediaWikiServices: rewrite using Guzzle T202352 |
| 568 | * @param array $reqs Map of HTTP request arrays |
| 569 | * @phpcs:ignore Generic.Files.LineLength |
| 570 | * @phan-param array<int,array{url:string,query:array,method:string,body:string,headers:array<string,string>,proxy?:?string}> $reqs |
| 571 | * @param array $opts |
| 572 | * - connTimeout : connection timeout per request (seconds) |
| 573 | * - reqTimeout : post-connection timeout per request (seconds) |
| 574 | * @phan-param array{connTimeout:int,reqTimeout:int} $opts |
| 575 | * @return array $reqs With response array populated for each |
| 576 | * @throws \Exception |
| 577 | */ |
| 578 | private function runMultiHttp( array $reqs, array $opts = [] ) { |
| 579 | $httpOptions = [ |
| 580 | 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout, |
| 581 | 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout, |
| 582 | 'logger' => $this->logger, |
| 583 | 'caInfo' => $this->caBundlePath, |
| 584 | ]; |
| 585 | foreach ( $reqs as &$req ) { |
| 586 | $reqOptions = $httpOptions + [ |
| 587 | 'method' => $req['method'], |
| 588 | 'proxy' => $req['proxy'] ?? $this->proxy, |
| 589 | 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent, |
| 590 | 'postData' => $req['body'], |
| 591 | ]; |
| 592 | |
| 593 | $url = $req['url']; |
| 594 | $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); |
| 595 | if ( $query != '' ) { |
| 596 | $url .= !str_contains( $req['url'], '?' ) ? "?$query" : "&$query"; |
| 597 | } |
| 598 | |
| 599 | $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( |
| 600 | $url, $reqOptions, __METHOD__ ); |
| 601 | $httpRequest->setLogger( $this->logger ); |
| 602 | foreach ( $req['headers'] as $header => $value ) { |
| 603 | $httpRequest->setHeader( $header, $value ); |
| 604 | } |
| 605 | $sv = $httpRequest->execute()->getStatusValue(); |
| 606 | |
| 607 | $respHeaders = array_map( |
| 608 | static fn ( $v ) => implode( ', ', $v ), |
| 609 | $httpRequest->getResponseHeaders() ); |
| 610 | |
| 611 | $req['response'] = [ |
| 612 | 'code' => $httpRequest->getStatus(), |
| 613 | 'reason' => '', |
| 614 | 'headers' => $respHeaders, |
| 615 | 'body' => $httpRequest->getContent(), |
| 616 | 'error' => '', |
| 617 | ]; |
| 618 | |
| 619 | if ( !$sv->isOK() ) { |
| 620 | $svErrors = $sv->getErrors(); |
| 621 | if ( isset( $svErrors[0] ) ) { |
| 622 | $req['response']['error'] = $svErrors[0]['message']; |
| 623 | |
| 624 | // param values vary per failure type (ex. unknown host vs unknown page) |
| 625 | if ( isset( $svErrors[0]['params'][0] ) ) { |
| 626 | if ( is_numeric( $svErrors[0]['params'][0] ) ) { |
| 627 | if ( isset( $svErrors[0]['params'][1] ) ) { |
| 628 | // @phan-suppress-next-line PhanTypeInvalidDimOffset |
| 629 | $req['response']['reason'] = $svErrors[0]['params'][1]; |
| 630 | } |
| 631 | } else { |
| 632 | $req['response']['reason'] = $svErrors[0]['params'][0]; |
| 633 | } |
| 634 | } |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | $req['response'][0] = $req['response']['code']; |
| 639 | $req['response'][1] = $req['response']['reason']; |
| 640 | $req['response'][2] = $req['response']['headers']; |
| 641 | $req['response'][3] = $req['response']['body']; |
| 642 | $req['response'][4] = $req['response']['error']; |
| 643 | } |
| 644 | |
| 645 | return $reqs; |
| 646 | } |
| 647 | |
| 648 | /** |
| 649 | * Normalize headers array |
| 650 | * @param array $headers |
| 651 | * @return array |
| 652 | */ |
| 653 | private function normalizeHeaders( array $headers ): array { |
| 654 | $normalized = []; |
| 655 | foreach ( $headers as $name => $value ) { |
| 656 | $normalized[strtolower( $name )] = $value; |
| 657 | } |
| 658 | return $normalized; |
| 659 | } |
| 660 | |
| 661 | /** |
| 662 | * Normalize request information |
| 663 | * |
| 664 | * @param array[] &$reqs the requests to normalize |
| 665 | */ |
| 666 | private function normalizeRequests( array &$reqs ) { |
| 667 | foreach ( $reqs as &$req ) { |
| 668 | $req['response'] = [ |
| 669 | 'code' => 0, |
| 670 | 'reason' => '', |
| 671 | 'headers' => [], |
| 672 | 'body' => '', |
| 673 | 'error' => '' |
| 674 | ]; |
| 675 | if ( isset( $req[0] ) ) { |
| 676 | $req['method'] = $req[0]; // short-form |
| 677 | unset( $req[0] ); |
| 678 | } |
| 679 | if ( isset( $req[1] ) ) { |
| 680 | $req['url'] = $req[1]; // short-form |
| 681 | unset( $req[1] ); |
| 682 | } |
| 683 | if ( !isset( $req['method'] ) ) { |
| 684 | throw new InvalidArgumentException( "Request has no 'method' field set." ); |
| 685 | } elseif ( !isset( $req['url'] ) ) { |
| 686 | throw new InvalidArgumentException( "Request has no 'url' field set." ); |
| 687 | } |
| 688 | if ( $this->localProxy !== false && $this->isLocalURL( $req['url'] ) ) { |
| 689 | $this->useReverseProxy( $req, $this->localProxy ); |
| 690 | } |
| 691 | $req['query'] ??= []; |
| 692 | $req['headers'] = $this->normalizeHeaders( |
| 693 | array_merge( |
| 694 | $this->headers, |
| 695 | $this->telemetry ? $this->telemetry->getRequestHeaders() : [], |
| 696 | $req['headers'] ?? [] |
| 697 | ) |
| 698 | ); |
| 699 | |
| 700 | if ( !isset( $req['body'] ) ) { |
| 701 | $req['body'] = ''; |
| 702 | $req['headers']['content-length'] = 0; |
| 703 | } |
| 704 | // Redact some headers we know to have tokens before logging them |
| 705 | $logHeaders = $req['headers']; |
| 706 | foreach ( $logHeaders as $header => $value ) { |
| 707 | if ( preg_match( self::SENSITIVE_HEADERS, $header ) === 1 ) { |
| 708 | $logHeaders[$header] = '[redacted]'; |
| 709 | } |
| 710 | } |
| 711 | $this->logger->debug( "HTTP start: {method} {url}", |
| 712 | [ |
| 713 | 'method' => $req['method'], |
| 714 | 'url' => $req['url'], |
| 715 | 'headers' => $logHeaders, |
| 716 | ] |
| 717 | ); |
| 718 | $req['flags'] ??= []; |
| 719 | } |
| 720 | } |
| 721 | |
| 722 | private function useReverseProxy( array &$req, string $proxy ) { |
| 723 | $parsedProxy = parse_url( $proxy ); |
| 724 | if ( $parsedProxy === false ) { |
| 725 | throw new InvalidArgumentException( "Invalid reverseProxy configured: $proxy" ); |
| 726 | } |
| 727 | $parsedUrl = parse_url( $req['url'] ); |
| 728 | if ( $parsedUrl === false ) { |
| 729 | throw new InvalidArgumentException( "Invalid url specified: {$req['url']}" ); |
| 730 | } |
| 731 | // Set the current host in the Host header |
| 732 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
| 733 | $req['headers']['Host'] = $parsedUrl['host']; |
| 734 | // Replace scheme, host and port in the request |
| 735 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
| 736 | $parsedUrl['scheme'] = $parsedProxy['scheme']; |
| 737 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
| 738 | $parsedUrl['host'] = $parsedProxy['host']; |
| 739 | if ( isset( $parsedProxy['port'] ) ) { |
| 740 | $parsedUrl['port'] = $parsedProxy['port']; |
| 741 | } else { |
| 742 | unset( $parsedUrl['port'] ); |
| 743 | } |
| 744 | $req['url'] = self::assembleUrl( $parsedUrl ); |
| 745 | // Explicitly disable use of another proxy by setting to false, |
| 746 | // since null will fallback to $this->proxy |
| 747 | $req['proxy'] = false; |
| 748 | } |
| 749 | |
| 750 | /** |
| 751 | * This is derived from MediaWiki\Utils\UrlUtils::assemble but changed to work |
| 752 | * with parse_url's result so the delimiter is hardcoded. |
| 753 | * |
| 754 | * The basic structure used: |
| 755 | * [scheme://][[user][:pass]@][host][:port][path][?query][#fragment] |
| 756 | * |
| 757 | * @param array $urlParts URL parts, as output from parse_url() |
| 758 | * @return string URL assembled from its component parts |
| 759 | */ |
| 760 | private static function assembleUrl( array $urlParts ): string { |
| 761 | $result = isset( $urlParts['scheme'] ) ? $urlParts['scheme'] . '://' : ''; |
| 762 | |
| 763 | if ( isset( $urlParts['host'] ) ) { |
| 764 | if ( isset( $urlParts['user'] ) ) { |
| 765 | $result .= $urlParts['user']; |
| 766 | if ( isset( $urlParts['pass'] ) ) { |
| 767 | $result .= ':' . $urlParts['pass']; |
| 768 | } |
| 769 | $result .= '@'; |
| 770 | } |
| 771 | |
| 772 | $result .= $urlParts['host']; |
| 773 | |
| 774 | if ( isset( $urlParts['port'] ) ) { |
| 775 | $result .= ':' . $urlParts['port']; |
| 776 | } |
| 777 | } |
| 778 | |
| 779 | if ( isset( $urlParts['path'] ) ) { |
| 780 | $result .= $urlParts['path']; |
| 781 | } |
| 782 | |
| 783 | if ( isset( $urlParts['query'] ) && $urlParts['query'] !== '' ) { |
| 784 | $result .= '?' . $urlParts['query']; |
| 785 | } |
| 786 | |
| 787 | if ( isset( $urlParts['fragment'] ) ) { |
| 788 | $result .= '#' . $urlParts['fragment']; |
| 789 | } |
| 790 | |
| 791 | return $result; |
| 792 | } |
| 793 | |
| 794 | /** |
| 795 | * Check if the URL can be served by localhost |
| 796 | * |
| 797 | * @note this is mostly a copy of MWHttpRequest::isLocalURL() |
| 798 | * @param string $url Full url to check |
| 799 | * @return bool |
| 800 | */ |
| 801 | private function isLocalURL( $url ) { |
| 802 | if ( !$this->localVirtualHosts ) { |
| 803 | // Shortcut |
| 804 | return false; |
| 805 | } |
| 806 | |
| 807 | // Extract host part |
| 808 | $matches = []; |
| 809 | if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) { |
| 810 | $host = $matches[1]; |
| 811 | // Split up dotwise |
| 812 | $domainParts = explode( '.', $host ); |
| 813 | // Check if this domain or any superdomain is listed as a local virtual host |
| 814 | $domainParts = array_reverse( $domainParts ); |
| 815 | |
| 816 | $domain = ''; |
| 817 | $countParts = count( $domainParts ); |
| 818 | for ( $i = 0; $i < $countParts; $i++ ) { |
| 819 | $domainPart = $domainParts[$i]; |
| 820 | if ( $i == 0 ) { |
| 821 | $domain = $domainPart; |
| 822 | } else { |
| 823 | $domain = $domainPart . '.' . $domain; |
| 824 | } |
| 825 | |
| 826 | if ( in_array( $domain, $this->localVirtualHosts ) ) { |
| 827 | return true; |
| 828 | } |
| 829 | } |
| 830 | } |
| 831 | |
| 832 | return false; |
| 833 | } |
| 834 | |
| 835 | /** |
| 836 | * Get a suitable select timeout for the given options. |
| 837 | * |
| 838 | * @param array $opts |
| 839 | * @return float |
| 840 | */ |
| 841 | private function getSelectTimeout( $opts ) { |
| 842 | $connTimeout = $opts['connTimeout'] ?? $this->connTimeout; |
| 843 | $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout; |
| 844 | $timeouts = array_filter( [ $connTimeout, $reqTimeout ] ); |
| 845 | if ( count( $timeouts ) === 0 ) { |
| 846 | return 1; |
| 847 | } |
| 848 | |
| 849 | $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR; |
| 850 | // Minimum 10us |
| 851 | if ( $selectTimeout < 10e-6 ) { |
| 852 | $selectTimeout = 10e-6; |
| 853 | } |
| 854 | return $selectTimeout; |
| 855 | } |
| 856 | |
| 857 | /** |
| 858 | * Register a logger |
| 859 | */ |
| 860 | public function setLogger( LoggerInterface $logger ): void { |
| 861 | $this->logger = $logger; |
| 862 | } |
| 863 | |
| 864 | public function __destruct() { |
| 865 | if ( $this->cmh ) { |
| 866 | curl_multi_close( $this->cmh ); |
| 867 | $this->cmh = null; |
| 868 | } |
| 869 | } |
| 870 | |
| 871 | } |
| 872 | /** @deprecated class alias since 1.43 */ |
| 873 | class_alias( MultiHttpClient::class, 'MultiHttpClient' ); |