MediaWiki REL1_37
MultiHttpClient.php
Go to the documentation of this file.
1<?php
24use Psr\Log\LoggerAwareInterface;
25use Psr\Log\LoggerInterface;
26use Psr\Log\NullLogger;
27
55class MultiHttpClient implements LoggerAwareInterface {
57 protected $cmh;
59 protected $caBundlePath;
61 protected $connTimeout = 10;
63 protected $maxConnTimeout = INF;
65 protected $reqTimeout = 30;
67 protected $maxReqTimeout = INF;
69 protected $usePipelining = false;
71 protected $maxConnsPerHost = 50;
73 protected $proxy;
75 protected $userAgent = 'wikimedia/multi-http-client v1.0';
77 protected $logger;
78
79 // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect
80 // timeouts are periodically polled instead of being accurately respected.
81 // The select timeout is set to the minimum timeout multiplied by this factor.
82 private const TIMEOUT_ACCURACY_FACTOR = 0.1;
83
102 public function __construct( array $options ) {
103 if ( isset( $options['caBundlePath'] ) ) {
104 $this->caBundlePath = $options['caBundlePath'];
105 if ( !file_exists( $this->caBundlePath ) ) {
106 throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
107 }
108 }
109 static $opts = [
110 'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout',
111 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger'
112 ];
113 foreach ( $opts as $key ) {
114 if ( isset( $options[$key] ) ) {
115 $this->$key = $options[$key];
116 }
117 }
118 if ( $this->logger === null ) {
119 $this->logger = new NullLogger;
120 }
121 }
122
144 public function run( array $req, array $opts = [] ) {
145 return $this->runMulti( [ $req ], $opts )[0]['response'];
146 }
147
177 public function runMulti( array $reqs, array $opts = [] ) {
178 $this->normalizeRequests( $reqs );
179 $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ];
180
181 if ( $opts['connTimeout'] > $this->maxConnTimeout ) {
182 $opts['connTimeout'] = $this->maxConnTimeout;
183 }
184 if ( $opts['reqTimeout'] > $this->maxReqTimeout ) {
185 $opts['reqTimeout'] = $this->maxReqTimeout;
186 }
187
188 if ( $this->isCurlEnabled() ) {
189 return $this->runMultiCurl( $reqs, $opts );
190 } else {
191 return $this->runMultiHttp( $reqs, $opts );
192 }
193 }
194
200 protected function isCurlEnabled() {
201 // Explicitly test if curl_multi* is blocked, as some users' hosts provide
202 // them with a modified curl with the multi-threaded parts removed(!)
203 return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' );
204 }
205
222 private function runMultiCurl( array $reqs, array $opts ) {
223 $chm = $this->getCurlMulti( $opts );
224
225 $selectTimeout = $this->getSelectTimeout( $opts );
226
227 // Add all of the required cURL handles...
228 $handles = [];
229 foreach ( $reqs as $index => &$req ) {
230 $handles[$index] = $this->getCurlHandle( $req, $opts );
231 curl_multi_add_handle( $chm, $handles[$index] );
232 }
233 unset( $req ); // don't assign over this by accident
234
235 $infos = [];
236 // Execute the cURL handles concurrently...
237 $active = null; // handles still being processed
238 do {
239 // Do any available work...
240 do {
241 $mrc = curl_multi_exec( $chm, $active );
242 $info = curl_multi_info_read( $chm );
243 if ( $info !== false ) {
244 $infos[(int)$info['handle']] = $info;
245 }
246 } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
247 // Wait (if possible) for available work...
248 if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) {
249 // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
250 usleep( 5000 ); // 5ms
251 }
252 } while ( $active > 0 && $mrc == CURLM_OK );
253
254 // Remove all of the added cURL handles and check for errors...
255 foreach ( $reqs as $index => &$req ) {
256 $ch = $handles[$index];
257 curl_multi_remove_handle( $chm, $ch );
258
259 if ( isset( $infos[(int)$ch] ) ) {
260 $info = $infos[(int)$ch];
261 $errno = $info['result'];
262 if ( $errno !== 0 ) {
263 $req['response']['error'] = "(curl error: $errno)";
264 if ( function_exists( 'curl_strerror' ) ) {
265 $req['response']['error'] .= " " . curl_strerror( $errno );
266 }
267 $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
268 $req['response']['error'] );
269 } else {
270 $this->logger->debug(
271 "HTTP complete: {method} {url} code={response_code} size={size} " .
272 "total={total_time} connect={connect_time}",
273 [
274 'method' => $req['method'],
275 'url' => $req['url'],
276 'response_code' => $req['response']['code'],
277 'size' => curl_getinfo( $ch, CURLINFO_SIZE_DOWNLOAD ),
278 'total_time' => $this->getCurlTime(
279 $ch, CURLINFO_TOTAL_TIME, 'CURLINFO_TOTAL_TIME_T'
280 ),
281 'connect_time' => $this->getCurlTime(
282 $ch, CURLINFO_CONNECT_TIME, 'CURLINFO_CONNECT_TIME_T'
283 ),
284 ]
285 );
286 }
287 } else {
288 $req['response']['error'] = "(curl error: no status set)";
289 }
290
291 // For convenience with the list() operator
292 $req['response'][0] = $req['response']['code'];
293 $req['response'][1] = $req['response']['reason'];
294 $req['response'][2] = $req['response']['headers'];
295 $req['response'][3] = $req['response']['body'];
296 $req['response'][4] = $req['response']['error'];
297 curl_close( $ch );
298 // Close any string wrapper file handles
299 if ( isset( $req['_closeHandle'] ) ) {
300 fclose( $req['_closeHandle'] );
301 unset( $req['_closeHandle'] );
302 }
303 }
304 unset( $req ); // don't assign over this by accident
305
306 return $reqs;
307 }
308
319 protected function getCurlHandle( array &$req, array $opts ) {
320 $ch = curl_init();
321
322 curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy );
323 curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) );
324 curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) );
325 curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
326 curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
327 curl_setopt( $ch, CURLOPT_HEADER, 0 );
328 if ( $this->caBundlePath !== null ) {
329 curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
330 curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
331 }
332 curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
333
334 $url = $req['url'];
335 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
336 if ( $query != '' ) {
337 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
338 }
339 curl_setopt( $ch, CURLOPT_URL, $url );
340 curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
341 curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) );
342
343 if ( $req['method'] === 'PUT' ) {
344 curl_setopt( $ch, CURLOPT_PUT, 1 );
345 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
346 if ( is_resource( $req['body'] ) ) {
347 curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
348 if ( isset( $req['headers']['content-length'] ) ) {
349 curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
350 } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
351 $req['headers']['transfer-encoding'] === 'chunks'
352 ) {
353 curl_setopt( $ch, CURLOPT_UPLOAD, true );
354 } else {
355 throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
356 }
357 } elseif ( $req['body'] !== '' ) {
358 $fp = fopen( "php://temp", "wb+" );
359 fwrite( $fp, $req['body'], strlen( $req['body'] ) );
360 rewind( $fp );
361 curl_setopt( $ch, CURLOPT_INFILE, $fp );
362 curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
363 $req['_closeHandle'] = $fp; // remember to close this later
364 } else {
365 curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
366 }
367 curl_setopt( $ch, CURLOPT_READFUNCTION,
368 static function ( $ch, $fd, $length ) {
369 return (string)fread( $fd, $length );
370 }
371 );
372 } elseif ( $req['method'] === 'POST' ) {
373 curl_setopt( $ch, CURLOPT_POST, 1 );
374 curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
375 } else {
376 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
377 if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
378 throw new Exception( "HTTP body specified for a non PUT/POST request." );
379 }
380 $req['headers']['content-length'] = 0;
381 }
382
383 if ( !isset( $req['headers']['user-agent'] ) ) {
384 $req['headers']['user-agent'] = $this->userAgent;
385 }
386
387 $headers = [];
388 foreach ( $req['headers'] as $name => $value ) {
389 if ( strpos( $name, ': ' ) ) {
390 throw new Exception( "Headers cannot have ':' in the name." );
391 }
392 $headers[] = $name . ': ' . trim( $value );
393 }
394 curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
395
396 curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
397 static function ( $ch, $header ) use ( &$req ) {
398 if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) {
399 header( $header );
400 }
401 $length = strlen( $header );
402 $matches = [];
403 if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) {
404 $req['response']['code'] = (int)$matches[2];
405 $req['response']['reason'] = trim( $matches[3] );
406 // After a redirect we will receive this again, but we already stored headers
407 // that belonged to a redirect response. Start over.
408 $req['response']['headers'] = [];
409 return $length;
410 }
411 if ( strpos( $header, ":" ) === false ) {
412 return $length;
413 }
414 list( $name, $value ) = explode( ":", $header, 2 );
415 $name = strtolower( $name );
416 $value = trim( $value );
417 if ( isset( $req['response']['headers'][$name] ) ) {
418 $req['response']['headers'][$name] .= ', ' . $value;
419 } else {
420 $req['response']['headers'][$name] = $value;
421 }
422 return $length;
423 }
424 );
425
426 // This works with both file and php://temp handles (unlike CURLOPT_FILE)
427 $hasOutputStream = isset( $req['stream'] );
428 curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
429 static function ( $ch, $data ) use ( &$req, $hasOutputStream ) {
430 if ( $hasOutputStream ) {
431 return fwrite( $req['stream'], $data );
432 } else {
433 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
434 $req['response']['body'] .= $data;
435
436 return strlen( $data );
437 }
438 }
439 );
440
441 return $ch;
442 }
443
449 protected function getCurlMulti( array $opts ) {
450 if ( !$this->cmh ) {
451 $cmh = curl_multi_init();
452 // Limit the size of the idle connection cache such that consecutive parallel
453 // request batches to the same host can avoid having to keep making connections
454 curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
455 $this->cmh = $cmh;
456 }
457
458 $curlVersion = curl_version()['version'];
459
460 // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0
461 if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) {
462 // Limit the number of in-flight requests for any given host
463 $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost;
464 curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns );
465 }
466
467 if ( $opts['usePipelining'] ?? $this->usePipelining ) {
468 if ( version_compare( $curlVersion, '7.43', '<' ) ) {
469 // The option is a boolean
470 $pipelining = 1;
471 } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) {
472 // The option is a bitfield and HTTP/1.x pipelining is supported
473 $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX;
474 } else {
475 // The option is a bitfield but HTTP/1.x pipelining has been removed
476 $pipelining = CURLPIPE_MULTIPLEX;
477 }
478 // Suppress deprecation, we know already (T264735)
479 // phpcs:ignore Generic.PHP.NoSilencedErrors
480 @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining );
481 }
482
483 return $this->cmh;
484 }
485
495 private function getCurlTime( $ch, $oldOption, $newConstName ): string {
496 if ( defined( $newConstName ) ) {
497 return sprintf( "%.6f", curl_getinfo( $ch, constant( $newConstName ) ) / 1e6 );
498 } else {
499 return (string)curl_getinfo( $ch, $oldOption );
500 }
501 }
502
519 private function runMultiHttp( array $reqs, array $opts = [] ) {
520 $httpOptions = [
521 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout,
522 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout,
523 'logger' => $this->logger,
524 'caInfo' => $this->caBundlePath,
525 ];
526 foreach ( $reqs as &$req ) {
527 $reqOptions = $httpOptions + [
528 'method' => $req['method'],
529 'proxy' => $req['proxy'] ?? $this->proxy,
530 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent,
531 'postData' => $req['body'],
532 ];
533
534 $url = $req['url'];
535 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
536 if ( $query != '' ) {
537 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
538 }
539
540 $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create(
541 $url, $reqOptions, __METHOD__ );
542 $httpRequest->setLogger( $this->logger );
543 $sv = $httpRequest->execute()->getStatusValue();
544
545 $respHeaders = array_map(
546 static function ( $v ) {
547 return implode( ', ', $v );
548 },
549 $httpRequest->getResponseHeaders() );
550
551 $req['response'] = [
552 'code' => $httpRequest->getStatus(),
553 'reason' => '',
554 'headers' => $respHeaders,
555 'body' => $httpRequest->getContent(),
556 'error' => '',
557 ];
558
559 if ( !$sv->isOK() ) {
560 $svErrors = $sv->getErrors();
561 if ( isset( $svErrors[0] ) ) {
562 $req['response']['error'] = $svErrors[0]['message'];
563
564 // param values vary per failure type (ex. unknown host vs unknown page)
565 if ( isset( $svErrors[0]['params'][0] ) ) {
566 if ( is_numeric( $svErrors[0]['params'][0] ) ) {
567 if ( isset( $svErrors[0]['params'][1] ) ) {
568 // @phan-suppress-next-line PhanTypeInvalidDimOffset
569 $req['response']['reason'] = $svErrors[0]['params'][1];
570 }
571 } else {
572 $req['response']['reason'] = $svErrors[0]['params'][0];
573 }
574 }
575 }
576 }
577
578 $req['response'][0] = $req['response']['code'];
579 $req['response'][1] = $req['response']['reason'];
580 $req['response'][2] = $req['response']['headers'];
581 $req['response'][3] = $req['response']['body'];
582 $req['response'][4] = $req['response']['error'];
583 }
584
585 return $reqs;
586 }
587
593 private function normalizeRequests( array &$reqs ) {
594 foreach ( $reqs as &$req ) {
595 $req['response'] = [
596 'code' => 0,
597 'reason' => '',
598 'headers' => [],
599 'body' => '',
600 'error' => ''
601 ];
602 if ( isset( $req[0] ) ) {
603 $req['method'] = $req[0]; // short-form
604 unset( $req[0] );
605 }
606 if ( isset( $req[1] ) ) {
607 $req['url'] = $req[1]; // short-form
608 unset( $req[1] );
609 }
610 if ( !isset( $req['method'] ) ) {
611 throw new Exception( "Request has no 'method' field set." );
612 } elseif ( !isset( $req['url'] ) ) {
613 throw new Exception( "Request has no 'url' field set." );
614 }
615 $this->logger->debug( "HTTP start: {method} {url}",
616 [
617 'method' => $req['method'],
618 'url' => $req['url'],
619 ]
620 );
621 $req['query'] = $req['query'] ?? [];
622 $headers = []; // normalized headers
623 if ( isset( $req['headers'] ) ) {
624 foreach ( $req['headers'] as $name => $value ) {
625 $headers[strtolower( $name )] = $value;
626 }
627 }
628 $req['headers'] = $headers;
629 if ( !isset( $req['body'] ) ) {
630 $req['body'] = '';
631 $req['headers']['content-length'] = 0;
632 }
633 $req['flags'] = $req['flags'] ?? [];
634 }
635 }
636
643 private function getSelectTimeout( $opts ) {
644 $connTimeout = $opts['connTimeout'] ?? $this->connTimeout;
645 $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout;
646 $timeouts = array_filter( [ $connTimeout, $reqTimeout ] );
647 if ( count( $timeouts ) === 0 ) {
648 return 1;
649 }
650
651 $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR;
652 // Minimum 10us for sanity
653 if ( $selectTimeout < 10e-6 ) {
654 $selectTimeout = 10e-6;
655 }
656 return $selectTimeout;
657 }
658
664 public function setLogger( LoggerInterface $logger ) {
665 $this->logger = $logger;
666 }
667
668 public function __destruct() {
669 if ( $this->cmh ) {
670 curl_multi_close( $this->cmh );
671 }
672 }
673}
if(ini_get('mbstring.func_overload')) if(!defined('MW_ENTRY_POINT'))
Pre-config setup: Before loading LocalSettings.php.
Definition Setup.php:88
MediaWikiServices is the service locator for the application scope of MediaWiki.
Class to handle multiple HTTP requests.
runMultiHttp(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests sequentially.
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests.
getSelectTimeout( $opts)
Get a suitable select timeout for the given options.
normalizeRequests(array &$reqs)
Normalize request information.
__construct(array $options)
Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get a client object with ap...
string null $proxy
proxy
string null $caBundlePath
SSL certificates path.
LoggerInterface $logger
setLogger(LoggerInterface $logger)
Register a logger.
run(array $req, array $opts=[])
Execute an HTTP(S) request.
resource $cmh
curl_multi_init() handle
getCurlHandle(array &$req, array $opts)
isCurlEnabled()
Determines if the curl extension is available.
getCurlTime( $ch, $oldOption, $newConstName)
Get a time in seconds, formatted with microsecond resolution, or fall back to second resolution on PH...
getCurlMulti(array $opts)
runMultiCurl(array $reqs, array $opts)
Execute a set of HTTP(S) requests concurrently.
$header