MediaWiki  master
MultiHttpClient.php
Go to the documentation of this file.
1 <?php
24 use Psr\Log\LoggerAwareInterface;
25 use Psr\Log\LoggerInterface;
26 use Psr\Log\NullLogger;
27 
55 class MultiHttpClient implements LoggerAwareInterface {
57  private const SENSITIVE_HEADERS = '/(^|-|_)(authorization|auth|password|cookie)($|-|_)/';
62  protected $cmh;
64  protected $caBundlePath;
66  protected $connTimeout = 10;
68  protected $maxConnTimeout = INF;
70  protected $reqTimeout = 30;
72  protected $maxReqTimeout = INF;
74  protected $usePipelining = false;
76  protected $maxConnsPerHost = 50;
78  protected $proxy;
80  protected $localProxy = false;
82  protected $localVirtualHosts = [];
84  protected $userAgent = 'wikimedia/multi-http-client v1.0';
86  protected $logger;
87 
88  // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect
89  // timeouts are periodically polled instead of being accurately respected.
90  // The select timeout is set to the minimum timeout multiplied by this factor.
91  private const TIMEOUT_ACCURACY_FACTOR = 0.1;
92 
113  public function __construct( array $options ) {
114  if ( isset( $options['caBundlePath'] ) ) {
115  $this->caBundlePath = $options['caBundlePath'];
116  if ( !file_exists( $this->caBundlePath ) ) {
117  throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
118  }
119  }
120  static $opts = [
121  'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout',
122  'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger',
123  'localProxy', 'localVirtualHosts',
124  ];
125  foreach ( $opts as $key ) {
126  if ( isset( $options[$key] ) ) {
127  $this->$key = $options[$key];
128  }
129  }
130  $this->logger ??= new NullLogger;
131  }
132 
156  public function run( array $req, array $opts = [] ) {
157  return $this->runMulti( [ $req ], $opts )[0]['response'];
158  }
159 
191  public function runMulti( array $reqs, array $opts = [] ) {
192  $this->normalizeRequests( $reqs );
193  $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ];
194 
195  if ( $this->maxConnTimeout && $opts['connTimeout'] > $this->maxConnTimeout ) {
196  $opts['connTimeout'] = $this->maxConnTimeout;
197  }
198  if ( $this->maxReqTimeout && $opts['reqTimeout'] > $this->maxReqTimeout ) {
199  $opts['reqTimeout'] = $this->maxReqTimeout;
200  }
201 
202  if ( $this->isCurlEnabled() ) {
203  switch ( $opts['httpVersion'] ?? null ) {
204  case 'v1.0':
205  $opts['httpVersion'] = CURL_HTTP_VERSION_1_0;
206  break;
207  case 'v1.1':
208  $opts['httpVersion'] = CURL_HTTP_VERSION_1_1;
209  break;
210  case 'v2':
211  case 'v2.0':
212  $opts['httpVersion'] = CURL_HTTP_VERSION_2_0;
213  break;
214  default:
215  $opts['httpVersion'] = CURL_HTTP_VERSION_NONE;
216  }
217  return $this->runMultiCurl( $reqs, $opts );
218  } else {
219  # TODO: Add handling for httpVersion option
220  return $this->runMultiHttp( $reqs, $opts );
221  }
222  }
223 
229  protected function isCurlEnabled() {
230  // Explicitly test if curl_multi* is blocked, as some users' hosts provide
231  // them with a modified curl with the multi-threaded parts removed(!)
232  return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' );
233  }
234 
252  private function runMultiCurl( array $reqs, array $opts ) {
253  $chm = $this->getCurlMulti( $opts );
254 
255  $selectTimeout = $this->getSelectTimeout( $opts );
256 
257  // Add all of the required cURL handles...
258  $handles = [];
259  foreach ( $reqs as $index => &$req ) {
260  $handles[$index] = $this->getCurlHandle( $req, $opts );
261  curl_multi_add_handle( $chm, $handles[$index] );
262  }
263  unset( $req ); // don't assign over this by accident
264 
265  $infos = [];
266  // Execute the cURL handles concurrently...
267  $active = null; // handles still being processed
268  do {
269  // Do any available work...
270  do {
271  $mrc = curl_multi_exec( $chm, $active );
272  $info = curl_multi_info_read( $chm );
273  if ( $info !== false ) {
274  // Note: cast to integer even works on PHP 8.0+ despite the
275  // handle being an object not a resource, because CurlHandle
276  // has a backwards-compatible cast_object handler.
277  $infos[(int)$info['handle']] = $info;
278  }
279  } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
280  // Wait (if possible) for available work...
281  if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) {
282  // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
283  usleep( 5000 ); // 5ms
284  }
285  } while ( $active > 0 && $mrc == CURLM_OK );
286 
287  // Remove all of the added cURL handles and check for errors...
288  foreach ( $reqs as $index => &$req ) {
289  $ch = $handles[$index];
290  curl_multi_remove_handle( $chm, $ch );
291 
292  if ( isset( $infos[(int)$ch] ) ) {
293  $info = $infos[(int)$ch];
294  $errno = $info['result'];
295  if ( $errno !== 0 ) {
296  $req['response']['error'] = "(curl error: $errno)";
297  if ( function_exists( 'curl_strerror' ) ) {
298  $req['response']['error'] .= " " . curl_strerror( $errno );
299  }
300  $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
301  $req['response']['error'] );
302  } else {
303  $this->logger->debug(
304  "HTTP complete: {method} {url} code={response_code} size={size} " .
305  "total={total_time} connect={connect_time}",
306  [
307  'method' => $req['method'],
308  'url' => $req['url'],
309  'response_code' => $req['response']['code'],
310  'size' => curl_getinfo( $ch, CURLINFO_SIZE_DOWNLOAD ),
311  'total_time' => $this->getCurlTime(
312  $ch, CURLINFO_TOTAL_TIME, 'CURLINFO_TOTAL_TIME_T'
313  ),
314  'connect_time' => $this->getCurlTime(
315  $ch, CURLINFO_CONNECT_TIME, 'CURLINFO_CONNECT_TIME_T'
316  ),
317  ]
318  );
319  }
320  } else {
321  $req['response']['error'] = "(curl error: no status set)";
322  }
323 
324  // For convenience with array destructuring
325  $req['response'][0] = $req['response']['code'];
326  $req['response'][1] = $req['response']['reason'];
327  $req['response'][2] = $req['response']['headers'];
328  $req['response'][3] = $req['response']['body'];
329  $req['response'][4] = $req['response']['error'];
330  curl_close( $ch );
331  // Close any string wrapper file handles
332  if ( isset( $req['_closeHandle'] ) ) {
333  fclose( $req['_closeHandle'] );
334  unset( $req['_closeHandle'] );
335  }
336  }
337  unset( $req ); // don't assign over this by accident
338 
339  return $reqs;
340  }
341 
354  protected function getCurlHandle( array &$req, array $opts ) {
355  $ch = curl_init();
356 
357  curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy );
358  curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) );
359  curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) );
360  curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
361  curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
362  curl_setopt( $ch, CURLOPT_HEADER, 0 );
363  if ( $this->caBundlePath !== null ) {
364  curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
365  curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
366  }
367  curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
368 
369  $url = $req['url'];
370  $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
371  if ( $query != '' ) {
372  $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
373  }
374  curl_setopt( $ch, CURLOPT_URL, $url );
375  curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
376  curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) );
377  curl_setopt( $ch, CURLOPT_HTTP_VERSION, $opts['httpVersion'] ?? CURL_HTTP_VERSION_NONE );
378 
379  if ( $req['method'] === 'PUT' ) {
380  curl_setopt( $ch, CURLOPT_PUT, 1 );
381  // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
382  if ( is_resource( $req['body'] ) ) {
383  curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
384  if ( isset( $req['headers']['content-length'] ) ) {
385  curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
386  } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
387  $req['headers']['transfer-encoding'] === 'chunks'
388  ) {
389  curl_setopt( $ch, CURLOPT_UPLOAD, true );
390  } else {
391  throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
392  }
393  } elseif ( $req['body'] !== '' ) {
394  $fp = fopen( "php://temp", "wb+" );
395  fwrite( $fp, $req['body'], strlen( $req['body'] ) );
396  rewind( $fp );
397  curl_setopt( $ch, CURLOPT_INFILE, $fp );
398  curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
399  $req['_closeHandle'] = $fp; // remember to close this later
400  } else {
401  curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
402  }
403  curl_setopt( $ch, CURLOPT_READFUNCTION,
404  static function ( $ch, $fd, $length ) {
405  return (string)fread( $fd, $length );
406  }
407  );
408  } elseif ( $req['method'] === 'POST' ) {
409  curl_setopt( $ch, CURLOPT_POST, 1 );
410  curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
411  } else {
412  // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
413  if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
414  throw new Exception( "HTTP body specified for a non PUT/POST request." );
415  }
416  $req['headers']['content-length'] = 0;
417  }
418 
419  if ( !isset( $req['headers']['user-agent'] ) ) {
420  $req['headers']['user-agent'] = $this->userAgent;
421  }
422 
423  $headers = [];
424  foreach ( $req['headers'] as $name => $value ) {
425  if ( strpos( $name, ': ' ) ) {
426  throw new Exception( "Headers cannot have ':' in the name." );
427  }
428  $headers[] = $name . ': ' . trim( $value );
429  }
430  curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
431 
432  curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
433  static function ( $ch, $header ) use ( &$req ) {
434  if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) {
435  header( $header );
436  }
437  $length = strlen( $header );
438  $matches = [];
439  if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) {
440  $req['response']['code'] = (int)$matches[2];
441  $req['response']['reason'] = trim( $matches[3] );
442  // After a redirect we will receive this again, but we already stored headers
443  // that belonged to a redirect response. Start over.
444  $req['response']['headers'] = [];
445  return $length;
446  }
447  if ( strpos( $header, ":" ) === false ) {
448  return $length;
449  }
450  [ $name, $value ] = explode( ":", $header, 2 );
451  $name = strtolower( $name );
452  $value = trim( $value );
453  if ( isset( $req['response']['headers'][$name] ) ) {
454  $req['response']['headers'][$name] .= ', ' . $value;
455  } else {
456  $req['response']['headers'][$name] = $value;
457  }
458  return $length;
459  }
460  );
461 
462  // This works with both file and php://temp handles (unlike CURLOPT_FILE)
463  $hasOutputStream = isset( $req['stream'] );
464  curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
465  static function ( $ch, $data ) use ( &$req, $hasOutputStream ) {
466  if ( $hasOutputStream ) {
467  // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
468  return fwrite( $req['stream'], $data );
469  } else {
470  // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
471  $req['response']['body'] .= $data;
472 
473  return strlen( $data );
474  }
475  }
476  );
477 
478  return $ch;
479  }
480 
487  protected function getCurlMulti( array $opts ) {
488  if ( !$this->cmh ) {
489  $cmh = curl_multi_init();
490  // Limit the size of the idle connection cache such that consecutive parallel
491  // request batches to the same host can avoid having to keep making connections
492  curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
493  $this->cmh = $cmh;
494  }
495 
496  $curlVersion = curl_version()['version'];
497 
498  // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0
499  if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) {
500  // Limit the number of in-flight requests for any given host
501  $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost;
502  curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns );
503  }
504 
505  if ( $opts['usePipelining'] ?? $this->usePipelining ) {
506  if ( version_compare( $curlVersion, '7.43', '<' ) ) {
507  // The option is a boolean
508  $pipelining = 1;
509  } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) {
510  // The option is a bitfield and HTTP/1.x pipelining is supported
511  $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX;
512  } else {
513  // The option is a bitfield but HTTP/1.x pipelining has been removed
514  $pipelining = CURLPIPE_MULTIPLEX;
515  }
516  // Suppress deprecation, we know already (T264735)
517  // phpcs:ignore Generic.PHP.NoSilencedErrors
518  @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining );
519  }
520 
521  return $this->cmh;
522  }
523 
534  private function getCurlTime( $ch, $oldOption, $newConstName ): string {
535  if ( defined( $newConstName ) ) {
536  return sprintf( "%.6F", curl_getinfo( $ch, constant( $newConstName ) ) / 1e6 );
537  } else {
538  return (string)curl_getinfo( $ch, $oldOption );
539  }
540  }
541 
557  private function runMultiHttp( array $reqs, array $opts = [] ) {
558  $httpOptions = [
559  'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout,
560  'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout,
561  'logger' => $this->logger,
562  'caInfo' => $this->caBundlePath,
563  ];
564  foreach ( $reqs as &$req ) {
565  $reqOptions = $httpOptions + [
566  'method' => $req['method'],
567  'proxy' => $req['proxy'] ?? $this->proxy,
568  'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent,
569  'postData' => $req['body'],
570  ];
571 
572  $url = $req['url'];
573  $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
574  if ( $query != '' ) {
575  $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
576  }
577 
578  $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create(
579  $url, $reqOptions, __METHOD__ );
580  $httpRequest->setLogger( $this->logger );
581  $sv = $httpRequest->execute()->getStatusValue();
582 
583  $respHeaders = array_map(
584  static function ( $v ) {
585  return implode( ', ', $v );
586  },
587  $httpRequest->getResponseHeaders() );
588 
589  $req['response'] = [
590  'code' => $httpRequest->getStatus(),
591  'reason' => '',
592  'headers' => $respHeaders,
593  'body' => $httpRequest->getContent(),
594  'error' => '',
595  ];
596 
597  if ( !$sv->isOK() ) {
598  $svErrors = $sv->getErrors();
599  if ( isset( $svErrors[0] ) ) {
600  $req['response']['error'] = $svErrors[0]['message'];
601 
602  // param values vary per failure type (ex. unknown host vs unknown page)
603  if ( isset( $svErrors[0]['params'][0] ) ) {
604  if ( is_numeric( $svErrors[0]['params'][0] ) ) {
605  if ( isset( $svErrors[0]['params'][1] ) ) {
606  // @phan-suppress-next-line PhanTypeInvalidDimOffset
607  $req['response']['reason'] = $svErrors[0]['params'][1];
608  }
609  } else {
610  $req['response']['reason'] = $svErrors[0]['params'][0];
611  }
612  }
613  }
614  }
615 
616  $req['response'][0] = $req['response']['code'];
617  $req['response'][1] = $req['response']['reason'];
618  $req['response'][2] = $req['response']['headers'];
619  $req['response'][3] = $req['response']['body'];
620  $req['response'][4] = $req['response']['error'];
621  }
622 
623  return $reqs;
624  }
625 
631  private function normalizeRequests( array &$reqs ) {
632  foreach ( $reqs as &$req ) {
633  $req['response'] = [
634  'code' => 0,
635  'reason' => '',
636  'headers' => [],
637  'body' => '',
638  'error' => ''
639  ];
640  if ( isset( $req[0] ) ) {
641  $req['method'] = $req[0]; // short-form
642  unset( $req[0] );
643  }
644  if ( isset( $req[1] ) ) {
645  $req['url'] = $req[1]; // short-form
646  unset( $req[1] );
647  }
648  if ( !isset( $req['method'] ) ) {
649  throw new Exception( "Request has no 'method' field set." );
650  } elseif ( !isset( $req['url'] ) ) {
651  throw new Exception( "Request has no 'url' field set." );
652  }
653  if ( $this->localProxy !== false && $this->isLocalURL( $req['url'] ) ) {
654  $this->useReverseProxy( $req, $this->localProxy );
655  }
656  $req['query'] ??= [];
657  $headers = []; // normalized headers
658  if ( isset( $req['headers'] ) ) {
659  foreach ( $req['headers'] as $name => $value ) {
660  $headers[strtolower( $name )] = $value;
661  }
662  }
663  $req['headers'] = $headers;
664  if ( !isset( $req['body'] ) ) {
665  $req['body'] = '';
666  $req['headers']['content-length'] = 0;
667  }
668  // Redact some headers we know to have tokens before logging them
669  $logHeaders = $req['headers'];
670  foreach ( $logHeaders as $header => $value ) {
671  if ( preg_match( self::SENSITIVE_HEADERS, $header ) === 1 ) {
672  $logHeaders[$header] = '[redacted]';
673  }
674  }
675  $this->logger->debug( "HTTP start: {method} {url}",
676  [
677  'method' => $req['method'],
678  'url' => $req['url'],
679  'headers' => $logHeaders,
680  ]
681  );
682  $req['flags'] ??= [];
683  }
684  }
685 
686  private function useReverseProxy( array &$req, $proxy ) {
687  $parsedProxy = wfParseUrl( $proxy );
688  if ( $parsedProxy === false ) {
689  throw new Exception( "Invalid reverseProxy configured: $proxy" );
690  }
691  $parsedUrl = wfParseUrl( $req['url'] );
692  if ( $parsedUrl === false ) {
693  throw new Exception( "Invalid url specified: {$req['url']}" );
694  }
695  // Set the current host in the Host header
696  $req['headers']['Host'] = $parsedUrl['host'];
697  // Replace scheme, host and port in the request
698  $parsedUrl['scheme'] = $parsedProxy['scheme'];
699  $parsedUrl['host'] = $parsedProxy['host'];
700  if ( isset( $parsedProxy['port'] ) ) {
701  $parsedUrl['port'] = $parsedProxy['port'];
702  } else {
703  unset( $parsedUrl['port'] );
704  }
705  $req['url'] = wfAssembleUrl( $parsedUrl );
706  // Explicitly disable use of another proxy by setting to false,
707  // since null will fallback to $this->proxy
708  $req['proxy'] = false;
709  }
710 
718  private function isLocalURL( $url ) {
719  if ( !$this->localVirtualHosts ) {
720  // Shortcut
721  return false;
722  }
723 
724  // Extract host part
725  $matches = [];
726  if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) {
727  $host = $matches[1];
728  // Split up dotwise
729  $domainParts = explode( '.', $host );
730  // Check if this domain or any superdomain is listed as a local virtual host
731  $domainParts = array_reverse( $domainParts );
732 
733  $domain = '';
734  $countParts = count( $domainParts );
735  for ( $i = 0; $i < $countParts; $i++ ) {
736  $domainPart = $domainParts[$i];
737  if ( $i == 0 ) {
738  $domain = $domainPart;
739  } else {
740  $domain = $domainPart . '.' . $domain;
741  }
742 
743  if ( in_array( $domain, $this->localVirtualHosts ) ) {
744  return true;
745  }
746  }
747  }
748 
749  return false;
750  }
751 
758  private function getSelectTimeout( $opts ) {
759  $connTimeout = $opts['connTimeout'] ?? $this->connTimeout;
760  $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout;
761  $timeouts = array_filter( [ $connTimeout, $reqTimeout ] );
762  if ( count( $timeouts ) === 0 ) {
763  return 1;
764  }
765 
766  $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR;
767  // Minimum 10us
768  if ( $selectTimeout < 10e-6 ) {
769  $selectTimeout = 10e-6;
770  }
771  return $selectTimeout;
772  }
773 
779  public function setLogger( LoggerInterface $logger ) {
780  $this->logger = $logger;
781  }
782 
783  public function __destruct() {
784  if ( $this->cmh ) {
785  curl_multi_close( $this->cmh );
786  }
787  }
788 }
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfAssembleUrl( $urlParts)
This function will reassemble a URL parsed with wfParseURL.
$matches
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition: WebStart.php:82
Service locator for MediaWiki core services.
Class to handle multiple HTTP requests.
string[] $localVirtualHosts
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests.
resource object $cmh
@phpcs:ignore MediaWiki.Commenting.PropertyDocumentation.ObjectTypeHintVar curl_multi_init() handle
__construct(array $options)
Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get a client object with ap...
string null $proxy
proxy
string null $caBundlePath
SSL certificates path.
LoggerInterface $logger
setLogger(LoggerInterface $logger)
Register a logger.
run(array $req, array $opts=[])
Execute an HTTP(S) request.
string false $localProxy
getCurlHandle(array &$req, array $opts)
isCurlEnabled()
Determines if the curl extension is available.
getCurlMulti(array $opts)
$header