MediaWiki REL1_35
MultiHttpClient.php
Go to the documentation of this file.
1<?php
24use Psr\Log\LoggerAwareInterface;
25use Psr\Log\LoggerInterface;
26use Psr\Log\NullLogger;
27
55class MultiHttpClient implements LoggerAwareInterface {
57 protected $cmh;
59 protected $caBundlePath;
61 protected $connTimeout = 10;
63 protected $maxConnTimeout = INF;
65 protected $reqTimeout = 30;
67 protected $maxReqTimeout = INF;
69 protected $usePipelining = false;
71 protected $maxConnsPerHost = 50;
73 protected $proxy;
75 protected $userAgent = 'wikimedia/multi-http-client v1.0';
77 protected $logger;
78
79 // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect
80 // timeouts are periodically polled instead of being accurately respected.
81 // The select timeout is set to the minimum timeout multiplied by this factor.
82 private const TIMEOUT_ACCURACY_FACTOR = 0.1;
83
102 public function __construct( array $options ) {
103 if ( isset( $options['caBundlePath'] ) ) {
104 $this->caBundlePath = $options['caBundlePath'];
105 if ( !file_exists( $this->caBundlePath ) ) {
106 throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
107 }
108 }
109 static $opts = [
110 'connTimeout', 'maxConnTimeout', 'reqTimeout', 'maxReqTimeout',
111 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent', 'logger'
112 ];
113 foreach ( $opts as $key ) {
114 if ( isset( $options[$key] ) ) {
115 $this->$key = $options[$key];
116 }
117 }
118 if ( $this->logger === null ) {
119 $this->logger = new NullLogger;
120 }
121 }
122
144 public function run( array $req, array $opts = [] ) {
145 return $this->runMulti( [ $req ], $opts )[0]['response'];
146 }
147
177 public function runMulti( array $reqs, array $opts = [] ) {
178 $this->normalizeRequests( $reqs );
179 $opts += [ 'connTimeout' => $this->connTimeout, 'reqTimeout' => $this->reqTimeout ];
180
181 if ( $opts['connTimeout'] > $this->maxConnTimeout ) {
182 $opts['connTimeout'] = $this->maxConnTimeout;
183 }
184 if ( $opts['reqTimeout'] > $this->maxReqTimeout ) {
185 $opts['reqTimeout'] = $this->maxReqTimeout;
186 }
187
188 if ( $this->isCurlEnabled() ) {
189 return $this->runMultiCurl( $reqs, $opts );
190 } else {
191 return $this->runMultiHttp( $reqs, $opts );
192 }
193 }
194
200 protected function isCurlEnabled() {
201 // Explicitly test if curl_multi* is blocked, as some users' hosts provide
202 // them with a modified curl with the multi-threaded parts removed(!)
203 return extension_loaded( 'curl' ) && function_exists( 'curl_multi_init' );
204 }
205
224 private function runMultiCurl( array $reqs, array $opts ) {
225 $chm = $this->getCurlMulti( $opts );
226
227 $selectTimeout = $this->getSelectTimeout( $opts );
228
229 // Add all of the required cURL handles...
230 $handles = [];
231 foreach ( $reqs as $index => &$req ) {
232 $handles[$index] = $this->getCurlHandle( $req, $opts );
233 curl_multi_add_handle( $chm, $handles[$index] );
234 }
235 unset( $req ); // don't assign over this by accident
236
237 $infos = [];
238 // Execute the cURL handles concurrently...
239 $active = null; // handles still being processed
240 do {
241 // Do any available work...
242 do {
243 $mrc = curl_multi_exec( $chm, $active );
244 $info = curl_multi_info_read( $chm );
245 if ( $info !== false ) {
246 $infos[(int)$info['handle']] = $info;
247 }
248 } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
249 // Wait (if possible) for available work...
250 if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) {
251 // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
252 usleep( 5000 ); // 5ms
253 }
254 } while ( $active > 0 && $mrc == CURLM_OK );
255
256 // Remove all of the added cURL handles and check for errors...
257 foreach ( $reqs as $index => &$req ) {
258 $ch = $handles[$index];
259 curl_multi_remove_handle( $chm, $ch );
260
261 if ( isset( $infos[(int)$ch] ) ) {
262 $info = $infos[(int)$ch];
263 $errno = $info['result'];
264 if ( $errno !== 0 ) {
265 $req['response']['error'] = "(curl error: $errno)";
266 if ( function_exists( 'curl_strerror' ) ) {
267 $req['response']['error'] .= " " . curl_strerror( $errno );
268 }
269 $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
270 $req['response']['error'] );
271 }
272 } else {
273 $req['response']['error'] = "(curl error: no status set)";
274 }
275
276 // For convenience with the list() operator
277 $req['response'][0] = $req['response']['code'];
278 $req['response'][1] = $req['response']['reason'];
279 $req['response'][2] = $req['response']['headers'];
280 $req['response'][3] = $req['response']['body'];
281 $req['response'][4] = $req['response']['error'];
282 curl_close( $ch );
283 // Close any string wrapper file handles
284 if ( isset( $req['_closeHandle'] ) ) {
285 fclose( $req['_closeHandle'] );
286 unset( $req['_closeHandle'] );
287 }
288 }
289 unset( $req ); // don't assign over this by accident
290
291 return $reqs;
292 }
293
305 protected function getCurlHandle( array &$req, array $opts ) {
306 $ch = curl_init();
307
308 curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy );
309 curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, intval( $opts['connTimeout'] * 1e3 ) );
310 curl_setopt( $ch, CURLOPT_TIMEOUT_MS, intval( $opts['reqTimeout'] * 1e3 ) );
311 curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
312 curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
313 curl_setopt( $ch, CURLOPT_HEADER, 0 );
314 if ( $this->caBundlePath !== null ) {
315 curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
316 curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
317 }
318 curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
319
320 $url = $req['url'];
321 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
322 if ( $query != '' ) {
323 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
324 }
325 curl_setopt( $ch, CURLOPT_URL, $url );
326 curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
327 curl_setopt( $ch, CURLOPT_NOBODY, ( $req['method'] === 'HEAD' ) );
328
329 if ( $req['method'] === 'PUT' ) {
330 curl_setopt( $ch, CURLOPT_PUT, 1 );
331 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
332 if ( is_resource( $req['body'] ) ) {
333 curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
334 if ( isset( $req['headers']['content-length'] ) ) {
335 curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
336 } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
337 $req['headers']['transfer-encoding'] === 'chunks'
338 ) {
339 curl_setopt( $ch, CURLOPT_UPLOAD, true );
340 } else {
341 throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
342 }
343 } elseif ( $req['body'] !== '' ) {
344 $fp = fopen( "php://temp", "wb+" );
345 fwrite( $fp, $req['body'], strlen( $req['body'] ) );
346 rewind( $fp );
347 curl_setopt( $ch, CURLOPT_INFILE, $fp );
348 curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
349 $req['_closeHandle'] = $fp; // remember to close this later
350 } else {
351 curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
352 }
353 curl_setopt( $ch, CURLOPT_READFUNCTION,
354 function ( $ch, $fd, $length ) {
355 return (string)fread( $fd, $length );
356 }
357 );
358 } elseif ( $req['method'] === 'POST' ) {
359 curl_setopt( $ch, CURLOPT_POST, 1 );
360 curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
361 } else {
362 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
363 if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
364 throw new Exception( "HTTP body specified for a non PUT/POST request." );
365 }
366 $req['headers']['content-length'] = 0;
367 }
368
369 if ( !isset( $req['headers']['user-agent'] ) ) {
370 $req['headers']['user-agent'] = $this->userAgent;
371 }
372
373 $headers = [];
374 foreach ( $req['headers'] as $name => $value ) {
375 if ( strpos( $name, ': ' ) ) {
376 throw new Exception( "Headers cannot have ':' in the name." );
377 }
378 $headers[] = $name . ': ' . trim( $value );
379 }
380 curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
381
382 curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
383 function ( $ch, $header ) use ( &$req ) {
384 if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) {
385 header( $header );
386 }
387 $length = strlen( $header );
388 $matches = [];
389 if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) {
390 $req['response']['code'] = (int)$matches[2];
391 $req['response']['reason'] = trim( $matches[3] );
392 // After a redirect we will receive this again, but we already stored headers
393 // that belonged to a redirect response. Start over.
394 $req['response']['headers'] = [];
395 return $length;
396 }
397 if ( strpos( $header, ":" ) === false ) {
398 return $length;
399 }
400 list( $name, $value ) = explode( ":", $header, 2 );
401 $name = strtolower( $name );
402 $value = trim( $value );
403 if ( isset( $req['response']['headers'][$name] ) ) {
404 $req['response']['headers'][$name] .= ', ' . $value;
405 } else {
406 $req['response']['headers'][$name] = $value;
407 }
408 return $length;
409 }
410 );
411
412 // This works with both file and php://temp handles (unlike CURLOPT_FILE)
413 $hasOutputStream = isset( $req['stream'] );
414 curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
415 function ( $ch, $data ) use ( &$req, $hasOutputStream ) {
416 if ( $hasOutputStream ) {
417 return fwrite( $req['stream'], $data );
418 } else {
419 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
420 $req['response']['body'] .= $data;
421
422 return strlen( $data );
423 }
424 }
425 );
426
427 return $ch;
428 }
429
435 protected function getCurlMulti( array $opts ) {
436 if ( !$this->cmh ) {
437 if ( !function_exists( 'curl_multi_init' ) ) {
438 throw new Exception( "PHP cURL function curl_multi_init missing. " .
439 "Check https://www.mediawiki.org/wiki/Manual:CURL" );
440 }
441 $cmh = curl_multi_init();
442 // Limit the size of the idle connection cache such that consecutive parallel
443 // request batches to the same host can avoid having to keep making connections
444 curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
445 $this->cmh = $cmh;
446 }
447
448 $curlVersion = curl_version()['version'];
449
450 // CURLMOPT_MAX_HOST_CONNECTIONS is available since PHP 7.0.7 and cURL 7.30.0
451 if ( version_compare( $curlVersion, '7.30.0', '>=' ) ) {
452 // Limit the number of in-flight requests for any given host
453 $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost;
454 curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns );
455 }
456
457 if ( $opts['usePipelining'] ?? $this->usePipelining ) {
458 if ( version_compare( $curlVersion, '7.43', '<' ) ) {
459 // The option is a boolean
460 $pipelining = 1;
461 } elseif ( version_compare( $curlVersion, '7.62', '<' ) ) {
462 // The option is a bitfield and HTTP/1.x pipelining is supported
463 $pipelining = CURLPIPE_HTTP1 | CURLPIPE_MULTIPLEX;
464 } else {
465 // The option is a bitfield but HTTP/1.x pipelining has been removed
466 $pipelining = CURLPIPE_MULTIPLEX;
467 }
468 // Suppress deprecation, we know already (T264735)
469 // phpcs:ignore Generic.PHP.NoSilencedErrors
470 @curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, $pipelining );
471 }
472
473 return $this->cmh;
474 }
475
493 private function runMultiHttp( array $reqs, array $opts = [] ) {
494 $httpOptions = [
495 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout,
496 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout,
497 'logger' => $this->logger,
498 'caInfo' => $this->caBundlePath,
499 ];
500 foreach ( $reqs as &$req ) {
501 $reqOptions = $httpOptions + [
502 'method' => $req['method'],
503 'proxy' => $req['proxy'] ?? $this->proxy,
504 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent,
505 'postData' => $req['body'],
506 ];
507
508 $url = $req['url'];
509 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
510 if ( $query != '' ) {
511 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
512 }
513
514 $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create(
515 $url, $reqOptions, __METHOD__ );
516 $sv = $httpRequest->execute()->getStatusValue();
517
518 $respHeaders = array_map(
519 function ( $v ) {
520 return implode( ', ', $v );
521 },
522 $httpRequest->getResponseHeaders() );
523
524 $req['response'] = [
525 'code' => $httpRequest->getStatus(),
526 'reason' => '',
527 'headers' => $respHeaders,
528 'body' => $httpRequest->getContent(),
529 'error' => '',
530 ];
531
532 if ( !$sv->isOK() ) {
533 $svErrors = $sv->getErrors();
534 if ( isset( $svErrors[0] ) ) {
535 $req['response']['error'] = $svErrors[0]['message'];
536
537 // param values vary per failure type (ex. unknown host vs unknown page)
538 if ( isset( $svErrors[0]['params'][0] ) ) {
539 if ( is_numeric( $svErrors[0]['params'][0] ) ) {
540 if ( isset( $svErrors[0]['params'][1] ) ) {
541 // @phan-suppress-next-line PhanTypeInvalidDimOffset
542 $req['response']['reason'] = $svErrors[0]['params'][1];
543 }
544 } else {
545 $req['response']['reason'] = $svErrors[0]['params'][0];
546 }
547 }
548 }
549 }
550
551 $req['response'][0] = $req['response']['code'];
552 $req['response'][1] = $req['response']['reason'];
553 $req['response'][2] = $req['response']['headers'];
554 $req['response'][3] = $req['response']['body'];
555 $req['response'][4] = $req['response']['error'];
556 }
557
558 return $reqs;
559 }
560
566 private function normalizeRequests( array &$reqs ) {
567 foreach ( $reqs as &$req ) {
568 $req['response'] = [
569 'code' => 0,
570 'reason' => '',
571 'headers' => [],
572 'body' => '',
573 'error' => ''
574 ];
575 if ( isset( $req[0] ) ) {
576 $req['method'] = $req[0]; // short-form
577 unset( $req[0] );
578 }
579 if ( isset( $req[1] ) ) {
580 $req['url'] = $req[1]; // short-form
581 unset( $req[1] );
582 }
583 if ( !isset( $req['method'] ) ) {
584 throw new Exception( "Request has no 'method' field set." );
585 } elseif ( !isset( $req['url'] ) ) {
586 throw new Exception( "Request has no 'url' field set." );
587 }
588 $this->logger->debug( "{$req['method']}: {$req['url']}" );
589 $req['query'] = $req['query'] ?? [];
590 $headers = []; // normalized headers
591 if ( isset( $req['headers'] ) ) {
592 foreach ( $req['headers'] as $name => $value ) {
593 $headers[strtolower( $name )] = $value;
594 }
595 }
596 $req['headers'] = $headers;
597 if ( !isset( $req['body'] ) ) {
598 $req['body'] = '';
599 $req['headers']['content-length'] = 0;
600 }
601 $req['flags'] = $req['flags'] ?? [];
602 }
603 }
604
611 private function getSelectTimeout( $opts ) {
612 $connTimeout = $opts['connTimeout'] ?? $this->connTimeout;
613 $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout;
614 $timeouts = array_filter( [ $connTimeout, $reqTimeout ] );
615 if ( count( $timeouts ) === 0 ) {
616 return 1;
617 }
618
619 $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR;
620 // Minimum 10us for sanity
621 if ( $selectTimeout < 10e-6 ) {
622 $selectTimeout = 10e-6;
623 }
624 return $selectTimeout;
625 }
626
632 public function setLogger( LoggerInterface $logger ) {
633 $this->logger = $logger;
634 }
635
636 public function __destruct() {
637 if ( $this->cmh ) {
638 curl_multi_close( $this->cmh );
639 }
640 }
641}
MediaWikiServices is the service locator for the application scope of MediaWiki.
Class to handle multiple HTTP requests.
runMultiHttp(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests sequentially.
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests.
getSelectTimeout( $opts)
Get a suitable select timeout for the given options.
normalizeRequests(array &$reqs)
Normalize request information.
__construct(array $options)
Since 1.35, callers should use HttpRequestFactory::createMultiClient() to get a client object with ap...
string null $proxy
proxy
string null $caBundlePath
SSL certificates path.
LoggerInterface $logger
setLogger(LoggerInterface $logger)
Register a logger.
run(array $req, array $opts=[])
Execute an HTTP(S) request.
resource $cmh
curl_multi_init() handle
getCurlHandle(array &$req, array $opts)
isCurlEnabled()
Determines if the curl extension is available.
getCurlMulti(array $opts)
runMultiCurl(array $reqs, array $opts)
Execute a set of HTTP(S) requests concurrently.
$header