MediaWiki 1.40.4
MultiHttpClient.php
Go to the documentation of this file.
1<?php
24use Psr\Log\LoggerAwareInterface;
25use Psr\Log\LoggerInterface;
26use Psr\Log\NullLogger;
27
55class MultiHttpClient implements LoggerAwareInterface {
57 private const SENSITIVE_HEADERS = '/(^|-|_)(authorization|auth|password|cookie)($|-|_)/';
62 protected $cmh = null;
64 protected $caBundlePath;
66 protected $connTimeout = 10;
68 protected $maxConnTimeout = INF;
70 protected $reqTimeout = 30;
72 protected $maxReqTimeout = INF;
74 protected $usePipelining = false;
76 protected $maxConnsPerHost = 50;
78 protected $proxy;
80 protected $localProxy = false;
82 protected $localVirtualHosts = [];
84 protected $userAgent = 'wikimedia/multi-http-client v1.0';
86 protected $logger;
87
88 // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect
89 // timeouts are periodically polled instead of being accurately respected.
90 // The select timeout is set to the minimum timeout multiplied by this factor.
91 private const TIMEOUT_ACCURACY_FACTOR = 0.1;
92
113 public function __construct( array $options ) {
114 if ( isset( $options['caBundlePath'] ) ) {
115 $this->caBundlePath = $options['caBundlePath'];
116 if ( !file_exists( $this->caBundlePath ) ) {
117 throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
118 }
119 }
120 static $opts = [
121 'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout',
122 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger',
123 'localProxy', 'localVirtualHosts',
124 ];
125 foreach ( $opts as $key ) {
126 if ( isset( $options[$key] ) ) {
127 $this->$key = $options[$key];
128 }
129 }
130 $this->logger ??= new NullLogger;
131 }
132
156 public function run( array $req, array $opts = [] ) {
157 return $this->runMulti( [ $req ], $opts )[0]['response'];
158 }
159
191 public function runMulti( array $reqs, array $opts = [] ) {
192 $this->normalizeRequests( $reqs );
193 $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ];
194
195 if ( $this->maxConnTimeout && $opts['connTimeout'] > $this->maxConnTimeout ) {
196 $opts['connTimeout'] = $this->maxConnTimeout;
197 }
198 if ( $this->maxReqTimeout && $opts['reqTimeout'] > $this->maxReqTimeout ) {
199 $opts['reqTimeout'] = $this->maxReqTimeout;
200 }
201
202 if ( $this->isCurlEnabled() ) {
203 switch ( $opts['httpVersion'] ?? null ) {
204 case 'v1.0':
205 $opts['httpVersion'] = CURL_HTTP_VERSION_1_0;
206 break;
207 case 'v1.1':
208 $opts['httpVersion'] = CURL_HTTP_VERSION_1_1;
209 break;
210 case 'v2':
211 case 'v2.0':
212 $opts['httpVersion'] = CURL_HTTP_VERSION_2_0;
213 break;
214 default:
215 $opts['httpVersion'] = CURL_HTTP_VERSION_NONE;
216 }
217 return $this->runMultiCurl( $reqs, $opts );
218 } else {
219 # TODO: Add handling for httpVersion option
220 return $this->runMultiHttp( $reqs, $opts );
221 }
222 }
223
229 protected function isCurlEnabled() {
230 // Explicitly test if curl_multi* is blocked, as some users' hosts provide
231 // them with a modified curl with the multi-threaded parts removed(!)
232 return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' );
233 }
234
252 private function runMultiCurl( array $reqs, array $opts ) {
253 $chm = $this->getCurlMulti( $opts );
254
255 $selectTimeout = $this->getSelectTimeout( $opts );
256
257 // Add all of the required cURL handles...
258 $handles = [];
259 foreach ( $reqs as $index => &$req ) {
260 $handles[$index] = $this->getCurlHandle( $req, $opts );
261 curl_multi_add_handle( $chm, $handles[$index] );
262 }
263 unset( $req ); // don't assign over this by accident
264
265 $infos = [];
266 // Execute the cURL handles concurrently...
267 $active = null; // handles still being processed
268 do {
269 // Do any available work...
270 do {
271 $mrc = curl_multi_exec( $chm, $active );
272 $info = curl_multi_info_read( $chm );
273 if ( $info !== false ) {
274 // Note: cast to integer even works on PHP 8.0+ despite the
275 // handle being an object not a resource, because CurlHandle
276 // has a backwards-compatible cast_object handler.
277 $infos[(int)$info['handle']] = $info;
278 }
279 } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
280 // Wait (if possible) for available work...
281 if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) {
282 // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
283 usleep( 5000 ); // 5ms
284 }
285 } while ( $active > 0 && $mrc == CURLM_OK );
286
287 // Remove all of the added cURL handles and check for errors...
288 foreach ( $reqs as $index => &$req ) {
289 $ch = $handles[$index];
290 curl_multi_remove_handle( $chm, $ch );
291
292 if ( isset( $infos[(int)$ch] ) ) {
293 $info = $infos[(int)$ch];
294 $errno = $info['result'];
295 if ( $errno !== 0 ) {
296 $req['response']['error'] = "(curl error: $errno)";
297 if ( function_exists( 'curl_strerror' ) ) {
298 $req['response']['error'] .= " " . curl_strerror( $errno );
299 }
300 $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
301 $req['response']['error'] );
302 } else {
303 $this->logger->debug(
304 "HTTP complete: {method} {url} code={response_code} size={size} " .
305 "total={total_time} connect={connect_time}",
306 [
307 'method' => $req['method'],
308 'url' => $req['url'],
309 'response_code' => $req['response']['code'],
310 'size' => curl_getinfo( $ch, CURLINFO_SIZE_DOWNLOAD ),
311 'total_time' => $this->getCurlTime(
312 $ch, CURLINFO_TOTAL_TIME, 'CURLINFO_TOTAL_TIME_T'
313 ),
314 'connect_time' => $this->getCurlTime(
315 $ch, CURLINFO_CONNECT_TIME, 'CURLINFO_CONNECT_TIME_T'
316 ),
317 ]
318 );
319 }
320 } else {
321 $req['response']['error'] = "(curl error: no status set)";
322 }
323
324 // For convenience with array destructuring
325 $req['response'][0] = $req['response']['code'];
326 $req['response'][1] = $req['response']['reason'];
327 $req['response'][2] = $req['response']['headers'];
328 $req['response'][3] = $req['response']['body'];
329 $req['response'][4] = $req['response']['error'];
330 curl_close( $ch );
331 // Close any string wrapper file handles
332 if ( isset( $req['_closeHandle'] ) ) {
333 fclose( $req['_closeHandle'] );
334 unset( $req['_closeHandle'] );
335 }
336 }
337 unset( $req ); // don't assign over this by accident
338
339 return $reqs;
340 }
341
354 protected function getCurlHandle( array &$req, array $opts ) {
355 $ch = curl_init();
356
357 curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy );
358 curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) );
359 curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) );
360 curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
361 curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
362 curl_setopt( $ch, CURLOPT_HEADER, 0 );
363 if ( $this->caBundlePath !== null ) {
364 curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
365 curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
366 }
367 curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
368
369 $url = $req['url'];
370 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
371 if ( $query != '' ) {
372 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
373 }
374 curl_setopt( $ch, CURLOPT_URL, $url );
375 curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
376 curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) );
377 curl_setopt( $ch, CURLOPT_HTTP_VERSION, $opts['httpVersion'] ?? CURL_HTTP_VERSION_NONE );
378
379 if ( $req['method'] === 'PUT' ) {
380 curl_setopt( $ch, CURLOPT_PUT, 1 );
381 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
382 if ( is_resource( $req['body'] ) ) {
383 curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
384 if ( isset( $req['headers']['content-length'] ) ) {
385 curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
386 } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
387 $req['headers']['transfer-encoding'] === 'chunks'
388 ) {
389 curl_setopt( $ch, CURLOPT_UPLOAD, true );
390 } else {
391 throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
392 }
393 } elseif ( $req['body'] !== '' ) {
394 $fp = fopen( "php://temp", "wb+" );
395 fwrite( $fp, $req['body'], strlen( $req['body'] ) );
396 rewind( $fp );
397 curl_setopt( $ch, CURLOPT_INFILE, $fp );
398 curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
399 $req['_closeHandle'] = $fp; // remember to close this later
400 } else {
401 curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
402 }
403 curl_setopt( $ch, CURLOPT_READFUNCTION,
404 static function ( $ch, $fd, $length ) {
405 return (string)fread( $fd, $length );
406 }
407 );
408 } elseif ( $req['method'] === 'POST' ) {
409 curl_setopt( $ch, CURLOPT_POST, 1 );
410 curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
411 } else {
412 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
413 if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
414 throw new Exception( "HTTP body specified for a non PUT/POST request." );
415 }
416 $req['headers']['content-length'] = 0;
417 }
418
419 if ( !isset( $req['headers']['user-agent'] ) ) {
420 $req['headers']['user-agent'] = $this->userAgent;
421 }
422
423 $headers = [];
424 foreach ( $req['headers'] as $name => $value ) {
425 if ( strpos( $name, ': ' ) ) {
426 throw new Exception( "Headers cannot have ':' in the name." );
427 }
428 $headers[] = $name . ': ' . trim( $value );
429 }
430 curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
431
432 curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
433 static function ( $ch, $header ) use ( &$req ) {
434 if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) {
435 header( $header );
436 }
437 $length = strlen( $header );
438 $matches = [];
439 if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) {
440 $req['response']['code'] = (int)$matches[2];
441 $req['response']['reason'] = trim( $matches[3] );
442 // After a redirect we will receive this again, but we already stored headers
443 // that belonged to a redirect response. Start over.
444 $req['response']['headers'] = [];
445 return $length;
446 }
447 if ( strpos( $header, ":" ) === false ) {
448 return $length;
449 }
450 [ $name, $value ] = explode( ":", $header, 2 );
451 $name = strtolower( $name );
452 $value = trim( $value );
453 if ( isset( $req['response']['headers'][$name] ) ) {
454 $req['response']['headers'][$name] .= ', ' . $value;
455 } else {
456 $req['response']['headers'][$name] = $value;
457 }
458 return $length;
459 }
460 );
461
462 // This works with both file and php://temp handles (unlike CURLOPT_FILE)
463 $hasOutputStream = isset( $req['stream'] );
464 curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
465 static function ( $ch, $data ) use ( &$req, $hasOutputStream ) {
466 if ( $hasOutputStream ) {
467 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
468 return fwrite( $req['stream'], $data );
469 } else {
470 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
471 $req['response']['body'] .= $data;
472
473 return strlen( $data );
474 }
475 }
476 );
477
478 return $ch;
479 }
480
487 protected function getCurlMulti( array $opts ) {
488 if ( !$this->cmh ) {
489 $cmh = curl_multi_init();
490 // Limit the size of the idle connection cache such that consecutive parallel
491 // request batches to the same host can avoid having to keep making connections
492 curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
493 $this->cmh = $cmh;
494 }
495
496 $curlVersion = curl_version()['version'];
497
498 // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0
499 if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) {
500 // Limit the number of in-flight requests for any given host
501 $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost;
502 curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns );
503 }
504
505 if ( $opts['usePipelining'] ?? $this->usePipelining ) {
506 if ( version_compare( $curlVersion, '7.43', '<' ) ) {
507 // The option is a boolean
508 $pipelining = 1;
509 } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) {
510 // The option is a bitfield and HTTP/1.x pipelining is supported
511 $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX;
512 } else {
513 // The option is a bitfield but HTTP/1.x pipelining has been removed
514 $pipelining = CURLPIPE_MULTIPLEX;
515 }
516 // Suppress deprecation, we know already (T264735)
517 // phpcs:ignore Generic.PHP.NoSilencedErrors
518 @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining );
519 }
520
521 return $this->cmh;
522 }
523
534 private function getCurlTime( $ch, $oldOption, $newConstName ): string {
535 if ( defined( $newConstName ) ) {
536 return sprintf( "%.6F", curl_getinfo( $ch, constant( $newConstName ) ) / 1e6 );
537 } else {
538 return (string)curl_getinfo( $ch, $oldOption );
539 }
540 }
541
557 private function runMultiHttp( array $reqs, array $opts = [] ) {
558 $httpOptions = [
559 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout,
560 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout,
561 'logger' => $this->logger,
562 'caInfo' => $this->caBundlePath,
563 ];
564 foreach ( $reqs as &$req ) {
565 $reqOptions = $httpOptions + [
566 'method' => $req['method'],
567 'proxy' => $req['proxy'] ?? $this->proxy,
568 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent,
569 'postData' => $req['body'],
570 ];
571
572 $url = $req['url'];
573 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
574 if ( $query != '' ) {
575 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
576 }
577
578 $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create(
579 $url, $reqOptions, __METHOD__ );
580 $httpRequest->setLogger( $this->logger );
581 $sv = $httpRequest->execute()->getStatusValue();
582
583 $respHeaders = array_map(
584 static function ( $v ) {
585 return implode( ', ', $v );
586 },
587 $httpRequest->getResponseHeaders() );
588
589 $req['response'] = [
590 'code' => $httpRequest->getStatus(),
591 'reason' => '',
592 'headers' => $respHeaders,
593 'body' => $httpRequest->getContent(),
594 'error' => '',
595 ];
596
597 if ( !$sv->isOK() ) {
598 $svErrors = $sv->getErrors();
599 if ( isset( $svErrors[0] ) ) {
600 $req['response']['error'] = $svErrors[0]['message'];
601
602 // param values vary per failure type (ex. unknown host vs unknown page)
603 if ( isset( $svErrors[0]['params'][0] ) ) {
604 if ( is_numeric( $svErrors[0]['params'][0] ) ) {
605 if ( isset( $svErrors[0]['params'][1] ) ) {
606 // @phan-suppress-next-line PhanTypeInvalidDimOffset
607 $req['response']['reason'] = $svErrors[0]['params'][1];
608 }
609 } else {
610 $req['response']['reason'] = $svErrors[0]['params'][0];
611 }
612 }
613 }
614 }
615
616 $req['response'][0] = $req['response']['code'];
617 $req['response'][1] = $req['response']['reason'];
618 $req['response'][2] = $req['response']['headers'];
619 $req['response'][3] = $req['response']['body'];
620 $req['response'][4] = $req['response']['error'];
621 }
622
623 return $reqs;
624 }
625
631 private function normalizeRequests( array &$reqs ) {
632 foreach ( $reqs as &$req ) {
633 $req['response'] = [
634 'code' => 0,
635 'reason' => '',
636 'headers' => [],
637 'body' => '',
638 'error' => ''
639 ];
640 if ( isset( $req[0] ) ) {
641 $req['method'] = $req[0]; // short-form
642 unset( $req[0] );
643 }
644 if ( isset( $req[1] ) ) {
645 $req['url'] = $req[1]; // short-form
646 unset( $req[1] );
647 }
648 if ( !isset( $req['method'] ) ) {
649 throw new Exception( "Request has no 'method' field set." );
650 } elseif ( !isset( $req['url'] ) ) {
651 throw new Exception( "Request has no 'url' field set." );
652 }
653 if ( $this->localProxy !== false && $this->isLocalURL( $req['url'] ) ) {
654 $this->useReverseProxy( $req, $this->localProxy );
655 }
656 $req['query'] ??= [];
657 $headers = []; // normalized headers
658 if ( isset( $req['headers'] ) ) {
659 foreach ( $req['headers'] as $name => $value ) {
660 $headers[strtolower( $name )] = $value;
661 }
662 }
663 $req['headers'] = $headers;
664 if ( !isset( $req['body'] ) ) {
665 $req['body'] = '';
666 $req['headers']['content-length'] = 0;
667 }
668 // Redact some headers we know to have tokens before logging them
669 $logHeaders = $req['headers'];
670 foreach ( $logHeaders as $header => $value ) {
671 if ( preg_match( self::SENSITIVE_HEADERS, $header ) === 1 ) {
672 $logHeaders[$header] = '[redacted]';
673 }
674 }
675 $this->logger->debug( "HTTP start: {method} {url}",
676 [
677 'method' => $req['method'],
678 'url' => $req['url'],
679 'headers' => $logHeaders,
680 ]
681 );
682 $req['flags'] ??= [];
683 }
684 }
685
686 private function useReverseProxy( array &$req, $proxy ) {
687 $parsedProxy = wfParseUrl( $proxy );
688 if ( $parsedProxy === false ) {
689 throw new Exception( "Invalid reverseProxy configured: $proxy" );
690 }
691 $parsedUrl = wfParseUrl( $req['url'] );
692 if ( $parsedUrl === false ) {
693 throw new Exception( "Invalid url specified: {$req['url']}" );
694 }
695 // Set the current host in the Host header
696 $req['headers']['Host'] = $parsedUrl['host'];
697 // Replace scheme, host and port in the request
698 $parsedUrl['scheme'] = $parsedProxy['scheme'];
699 $parsedUrl['host'] = $parsedProxy['host'];
700 if ( isset( $parsedProxy['port'] ) ) {
701 $parsedUrl['port'] = $parsedProxy['port'];
702 } else {
703 unset( $parsedUrl['port'] );
704 }
705 $req['url'] = wfAssembleUrl( $parsedUrl );
706 // Explicitly disable use of another proxy by setting to false,
707 // since null will fallback to $this->proxy
708 $req['proxy'] = false;
709 }
710
718 private function isLocalURL( $url ) {
719 if ( !$this->localVirtualHosts ) {
720 // Shortcut
721 return false;
722 }
723
724 // Extract host part
725 $matches = [];
726 if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) {
727 $host = $matches[1];
728 // Split up dotwise
729 $domainParts = explode( '.', $host );
730 // Check if this domain or any superdomain is listed as a local virtual host
731 $domainParts = array_reverse( $domainParts );
732
733 $domain = '';
734 $countParts = count( $domainParts );
735 for ( $i = 0; $i < $countParts; $i++ ) {
736 $domainPart = $domainParts[$i];
737 if ( $i == 0 ) {
738 $domain = $domainPart;
739 } else {
740 $domain = $domainPart . '.' . $domain;
741 }
742
743 if ( in_array( $domain, $this->localVirtualHosts ) ) {
744 return true;
745 }
746 }
747 }
748
749 return false;
750 }
751
758 private function getSelectTimeout( $opts ) {
759 $connTimeout = $opts['connTimeout'] ?? $this->connTimeout;
760 $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout;
761 $timeouts = array_filter( [ $connTimeout, $reqTimeout ] );
762 if ( count( $timeouts ) === 0 ) {
763 return 1;
764 }
765
766 $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR;
767 // Minimum 10us
768 if ( $selectTimeout < 10e-6 ) {
769 $selectTimeout = 10e-6;
770 }
771 return $selectTimeout;
772 }
773
779 public function setLogger( LoggerInterface $logger ) {
780 $this->logger = $logger;
781 }
782
783 public function __destruct() {
784 if ( $this->cmh ) {
785 curl_multi_close( $this->cmh );
786 $this->cmh = null;
787 }
788 }
789}
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfAssembleUrl( $urlParts)
This function will reassemble a URL parsed with wfParseURL.
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:88
Service locator for MediaWiki core services.
Class to handle multiple HTTP requests.
string[] $localVirtualHosts
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests.
__construct(array $options)
Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get a client object with ap...
string null $proxy
proxy
string null $caBundlePath
SSL certificates path.
LoggerInterface $logger
resource object null $cmh
@phpcs:ignore MediaWiki.Commenting.PropertyDocumentation.ObjectTypeHintVar curl_multi_init() handle,...
setLogger(LoggerInterface $logger)
Register a logger.
run(array $req, array $opts=[])
Execute an HTTP(S) request.
string false $localProxy
getCurlHandle(array &$req, array $opts)
isCurlEnabled()
Determines if the curl extension is available.
getCurlMulti(array $opts)
$header