MediaWiki  1.30.0
MultiHttpClient.php
Go to the documentation of this file.
1 <?php
23 use Psr\Log\LoggerAwareInterface;
24 use Psr\Log\LoggerInterface;
25 use Psr\Log\NullLogger;
26 
48 class MultiHttpClient implements LoggerAwareInterface {
50  protected $multiHandle = null; // curl_multi handle
52  protected $caBundlePath;
54  protected $connTimeout = 10;
56  protected $reqTimeout = 300;
58  protected $usePipelining = false;
60  protected $maxConnsPerHost = 50;
62  protected $proxy;
64  protected $userAgent = 'wikimedia/multi-http-client v1.0';
66  protected $logger;
67 
78  public function __construct( array $options ) {
79  if ( isset( $options['caBundlePath'] ) ) {
80  $this->caBundlePath = $options['caBundlePath'];
81  if ( !file_exists( $this->caBundlePath ) ) {
82  throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
83  }
84  }
85  static $opts = [
86  'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost',
87  'proxy', 'userAgent', 'logger'
88  ];
89  foreach ( $opts as $key ) {
90  if ( isset( $options[$key] ) ) {
91  $this->$key = $options[$key];
92  }
93  }
94  if ( $this->logger === null ) {
95  $this->logger = new NullLogger;
96  }
97  }
98 
118  public function run( array $req, array $opts = [] ) {
119  return $this->runMulti( [ $req ], $opts )[0]['response'];
120  }
121 
148  public function runMulti( array $reqs, array $opts = [] ) {
149  $chm = $this->getCurlMulti();
150 
151  // Normalize $reqs and add all of the required cURL handles...
152  $handles = [];
153  foreach ( $reqs as $index => &$req ) {
154  $req['response'] = [
155  'code' => 0,
156  'reason' => '',
157  'headers' => [],
158  'body' => '',
159  'error' => ''
160  ];
161  if ( isset( $req[0] ) ) {
162  $req['method'] = $req[0]; // short-form
163  unset( $req[0] );
164  }
165  if ( isset( $req[1] ) ) {
166  $req['url'] = $req[1]; // short-form
167  unset( $req[1] );
168  }
169  if ( !isset( $req['method'] ) ) {
170  throw new Exception( "Request has no 'method' field set." );
171  } elseif ( !isset( $req['url'] ) ) {
172  throw new Exception( "Request has no 'url' field set." );
173  }
174  $this->logger->debug( "{$req['method']}: {$req['url']}" );
175  $req['query'] = isset( $req['query'] ) ? $req['query'] : [];
176  $headers = []; // normalized headers
177  if ( isset( $req['headers'] ) ) {
178  foreach ( $req['headers'] as $name => $value ) {
179  $headers[strtolower( $name )] = $value;
180  }
181  }
182  $req['headers'] = $headers;
183  if ( !isset( $req['body'] ) ) {
184  $req['body'] = '';
185  $req['headers']['content-length'] = 0;
186  }
187  $req['flags'] = isset( $req['flags'] ) ? $req['flags'] : [];
188  $handles[$index] = $this->getCurlHandle( $req, $opts );
189  if ( count( $reqs ) > 1 ) {
190  // https://github.com/guzzle/guzzle/issues/349
191  curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true );
192  }
193  }
194  unset( $req ); // don't assign over this by accident
195 
196  $indexes = array_keys( $reqs );
197  if ( isset( $opts['usePipelining'] ) ) {
198  curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] );
199  }
200  if ( isset( $opts['maxConnsPerHost'] ) ) {
201  // Keep these sockets around as they may be needed later in the request
202  curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] );
203  }
204 
205  // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS)
206  $batches = array_chunk( $indexes, $this->maxConnsPerHost );
207  $infos = [];
208 
209  foreach ( $batches as $batch ) {
210  // Attach all cURL handles for this batch
211  foreach ( $batch as $index ) {
212  curl_multi_add_handle( $chm, $handles[$index] );
213  }
214  // Execute the cURL handles concurrently...
215  $active = null; // handles still being processed
216  do {
217  // Do any available work...
218  do {
219  $mrc = curl_multi_exec( $chm, $active );
220  $info = curl_multi_info_read( $chm );
221  if ( $info !== false ) {
222  $infos[(int)$info['handle']] = $info;
223  }
224  } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
225  // Wait (if possible) for available work...
226  if ( $active > 0 && $mrc == CURLM_OK ) {
227  if ( curl_multi_select( $chm, 10 ) == -1 ) {
228  // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
229  usleep( 5000 ); // 5ms
230  }
231  }
232  } while ( $active > 0 && $mrc == CURLM_OK );
233  }
234 
235  // Remove all of the added cURL handles and check for errors...
236  foreach ( $reqs as $index => &$req ) {
237  $ch = $handles[$index];
238  curl_multi_remove_handle( $chm, $ch );
239 
240  if ( isset( $infos[(int)$ch] ) ) {
241  $info = $infos[(int)$ch];
242  $errno = $info['result'];
243  if ( $errno !== 0 ) {
244  $req['response']['error'] = "(curl error: $errno)";
245  if ( function_exists( 'curl_strerror' ) ) {
246  $req['response']['error'] .= " " . curl_strerror( $errno );
247  }
248  $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
249  $req['response']['error'] );
250  }
251  } else {
252  $req['response']['error'] = "(curl error: no status set)";
253  }
254 
255  // For convenience with the list() operator
256  $req['response'][0] = $req['response']['code'];
257  $req['response'][1] = $req['response']['reason'];
258  $req['response'][2] = $req['response']['headers'];
259  $req['response'][3] = $req['response']['body'];
260  $req['response'][4] = $req['response']['error'];
261  curl_close( $ch );
262  // Close any string wrapper file handles
263  if ( isset( $req['_closeHandle'] ) ) {
264  fclose( $req['_closeHandle'] );
265  unset( $req['_closeHandle'] );
266  }
267  }
268  unset( $req ); // don't assign over this by accident
269 
270  // Restore the default settings
271  curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining );
272  curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
273 
274  return $reqs;
275  }
276 
285  protected function getCurlHandle( array &$req, array $opts = [] ) {
286  $ch = curl_init();
287 
288  curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT,
289  isset( $opts['connTimeout'] ) ? $opts['connTimeout'] : $this->connTimeout );
290  curl_setopt( $ch, CURLOPT_PROXY, isset( $req['proxy'] ) ? $req['proxy'] : $this->proxy );
291  curl_setopt( $ch, CURLOPT_TIMEOUT,
292  isset( $opts['reqTimeout'] ) ? $opts['reqTimeout'] : $this->reqTimeout );
293  curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
294  curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
295  curl_setopt( $ch, CURLOPT_HEADER, 0 );
296  if ( !is_null( $this->caBundlePath ) ) {
297  curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
298  curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
299  }
300  curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
301 
302  $url = $req['url'];
303  $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
304  if ( $query != '' ) {
305  $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
306  }
307  curl_setopt( $ch, CURLOPT_URL, $url );
308 
309  curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
310  if ( $req['method'] === 'HEAD' ) {
311  curl_setopt( $ch, CURLOPT_NOBODY, 1 );
312  }
313 
314  if ( $req['method'] === 'PUT' ) {
315  curl_setopt( $ch, CURLOPT_PUT, 1 );
316  if ( is_resource( $req['body'] ) ) {
317  curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
318  if ( isset( $req['headers']['content-length'] ) ) {
319  curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
320  } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
321  $req['headers']['transfer-encoding'] === 'chunks'
322  ) {
323  curl_setopt( $ch, CURLOPT_UPLOAD, true );
324  } else {
325  throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
326  }
327  } elseif ( $req['body'] !== '' ) {
328  $fp = fopen( "php://temp", "wb+" );
329  fwrite( $fp, $req['body'], strlen( $req['body'] ) );
330  rewind( $fp );
331  curl_setopt( $ch, CURLOPT_INFILE, $fp );
332  curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
333  $req['_closeHandle'] = $fp; // remember to close this later
334  } else {
335  curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
336  }
337  curl_setopt( $ch, CURLOPT_READFUNCTION,
338  function ( $ch, $fd, $length ) {
339  $data = fread( $fd, $length );
340  $len = strlen( $data );
341  return $data;
342  }
343  );
344  } elseif ( $req['method'] === 'POST' ) {
345  curl_setopt( $ch, CURLOPT_POST, 1 );
346  // Don't interpret POST parameters starting with '@' as file uploads, because this
347  // makes it impossible to POST plain values starting with '@' (and causes security
348  // issues potentially exposing the contents of local files).
349  // The PHP manual says this option was introduced in PHP 5.5 defaults to true in PHP 5.6,
350  // but we support lower versions, and the option doesn't exist in HHVM 5.6.99.
351  if ( defined( 'CURLOPT_SAFE_UPLOAD' ) ) {
352  curl_setopt( $ch, CURLOPT_SAFE_UPLOAD, true );
353  } elseif ( is_array( $req['body'] ) ) {
354  // In PHP 5.2 and later, '@' is interpreted as a file upload if POSTFIELDS
355  // is an array, but not if it's a string. So convert $req['body'] to a string
356  // for safety.
357  $req['body'] = http_build_query( $req['body'] );
358  }
359  curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
360  } else {
361  if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
362  throw new Exception( "HTTP body specified for a non PUT/POST request." );
363  }
364  $req['headers']['content-length'] = 0;
365  }
366 
367  if ( !isset( $req['headers']['user-agent'] ) ) {
368  $req['headers']['user-agent'] = $this->userAgent;
369  }
370 
371  $headers = [];
372  foreach ( $req['headers'] as $name => $value ) {
373  if ( strpos( $name, ': ' ) ) {
374  throw new Exception( "Headers cannot have ':' in the name." );
375  }
376  $headers[] = $name . ': ' . trim( $value );
377  }
378  curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
379 
380  curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
381  function ( $ch, $header ) use ( &$req ) {
382  if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
383  header( $header );
384  }
385  $length = strlen( $header );
386  $matches = [];
387  if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
388  $req['response']['code'] = (int)$matches[2];
389  $req['response']['reason'] = trim( $matches[3] );
390  return $length;
391  }
392  if ( strpos( $header, ":" ) === false ) {
393  return $length;
394  }
395  list( $name, $value ) = explode( ":", $header, 2 );
396  $req['response']['headers'][strtolower( $name )] = trim( $value );
397  return $length;
398  }
399  );
400 
401  if ( isset( $req['stream'] ) ) {
402  // Don't just use CURLOPT_FILE as that might give:
403  // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE*
404  // The callback here handles both normal files and php://temp handles.
405  curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
406  function ( $ch, $data ) use ( &$req ) {
407  return fwrite( $req['stream'], $data );
408  }
409  );
410  } else {
411  curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
412  function ( $ch, $data ) use ( &$req ) {
413  $req['response']['body'] .= $data;
414  return strlen( $data );
415  }
416  );
417  }
418 
419  return $ch;
420  }
421 
425  protected function getCurlMulti() {
426  if ( !$this->multiHandle ) {
427  $cmh = curl_multi_init();
428  curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining );
429  curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
430  $this->multiHandle = $cmh;
431  }
432  return $this->multiHandle;
433  }
434 
440  public function setLogger( LoggerInterface $logger ) {
441  $this->logger = $logger;
442  }
443 
444  function __destruct() {
445  if ( $this->multiHandle ) {
446  curl_multi_close( $this->multiHandle );
447  }
448  }
449 }
MultiHttpClient
Class to handle concurrent HTTP requests.
Definition: MultiHttpClient.php:48
MultiHttpClient\$usePipelining
bool $usePipelining
Definition: MultiHttpClient.php:58
MultiHttpClient\$maxConnsPerHost
int $maxConnsPerHost
Definition: MultiHttpClient.php:60
captcha-old.count
count
Definition: captcha-old.py:249
MultiHttpClient\$connTimeout
int $connTimeout
Definition: MultiHttpClient.php:54
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
$req
this hook is for auditing only $req
Definition: hooks.txt:988
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
MultiHttpClient\run
run(array $req, array $opts=[])
Execute an HTTP(S) request.
Definition: MultiHttpClient.php:118
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MultiHttpClient\__destruct
__destruct()
Definition: MultiHttpClient.php:444
$query
null for the wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1581
MultiHttpClient\getCurlHandle
getCurlHandle(array &$req, array $opts=[])
Definition: MultiHttpClient.php:285
$matches
$matches
Definition: NoLocalSettings.php:24
MultiHttpClient\$caBundlePath
string null $caBundlePath
SSL certificates path
Definition: MultiHttpClient.php:52
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
MultiHttpClient\setLogger
setLogger(LoggerInterface $logger)
Register a logger.
Definition: MultiHttpClient.php:440
$value
$value
Definition: styleTest.css.php:45
$header
$header
Definition: updateCredits.php:35
MultiHttpClient\$userAgent
string $userAgent
Definition: MultiHttpClient.php:64
MultiHttpClient\$multiHandle
resource $multiHandle
Definition: MultiHttpClient.php:50
MultiHttpClient\runMulti
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests concurrently.
Definition: MultiHttpClient.php:148
MultiHttpClient\$proxy
string null $proxy
proxy
Definition: MultiHttpClient.php:62
MultiHttpClient\getCurlMulti
getCurlMulti()
Definition: MultiHttpClient.php:425
MultiHttpClient\__construct
__construct(array $options)
Definition: MultiHttpClient.php:78
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1965
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MultiHttpClient\$reqTimeout
int $reqTimeout
Definition: MultiHttpClient.php:56
$batch
$batch
Definition: linkcache.txt:23
array
the array() calling protocol came about after MediaWiki 1.4rc1.
MultiHttpClient\$logger
LoggerInterface $logger
Definition: MultiHttpClient.php:66