MediaWiki  1.29.1
MultiHttpClient.php
Go to the documentation of this file.
1 <?php
47  protected $multiHandle = null; // curl_multi handle
49  protected $caBundlePath;
51  protected $connTimeout = 10;
53  protected $reqTimeout = 300;
55  protected $usePipelining = false;
57  protected $maxConnsPerHost = 50;
59  protected $proxy;
61  protected $userAgent = 'wikimedia/multi-http-client v1.0';
62 
73  public function __construct( array $options ) {
74  if ( isset( $options['caBundlePath'] ) ) {
75  $this->caBundlePath = $options['caBundlePath'];
76  if ( !file_exists( $this->caBundlePath ) ) {
77  throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
78  }
79  }
80  static $opts = [
81  'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost', 'proxy', 'userAgent'
82  ];
83  foreach ( $opts as $key ) {
84  if ( isset( $options[$key] ) ) {
85  $this->$key = $options[$key];
86  }
87  }
88  }
89 
109  public function run( array $req, array $opts = [] ) {
110  return $this->runMulti( [ $req ], $opts )[0]['response'];
111  }
112 
139  public function runMulti( array $reqs, array $opts = [] ) {
140  $chm = $this->getCurlMulti();
141 
142  // Normalize $reqs and add all of the required cURL handles...
143  $handles = [];
144  foreach ( $reqs as $index => &$req ) {
145  $req['response'] = [
146  'code' => 0,
147  'reason' => '',
148  'headers' => [],
149  'body' => '',
150  'error' => ''
151  ];
152  if ( isset( $req[0] ) ) {
153  $req['method'] = $req[0]; // short-form
154  unset( $req[0] );
155  }
156  if ( isset( $req[1] ) ) {
157  $req['url'] = $req[1]; // short-form
158  unset( $req[1] );
159  }
160  if ( !isset( $req['method'] ) ) {
161  throw new Exception( "Request has no 'method' field set." );
162  } elseif ( !isset( $req['url'] ) ) {
163  throw new Exception( "Request has no 'url' field set." );
164  }
165  $req['query'] = isset( $req['query'] ) ? $req['query'] : [];
166  $headers = []; // normalized headers
167  if ( isset( $req['headers'] ) ) {
168  foreach ( $req['headers'] as $name => $value ) {
169  $headers[strtolower( $name )] = $value;
170  }
171  }
172  $req['headers'] = $headers;
173  if ( !isset( $req['body'] ) ) {
174  $req['body'] = '';
175  $req['headers']['content-length'] = 0;
176  }
177  $req['flags'] = isset( $req['flags'] ) ? $req['flags'] : [];
178  $handles[$index] = $this->getCurlHandle( $req, $opts );
179  if ( count( $reqs ) > 1 ) {
180  // https://github.com/guzzle/guzzle/issues/349
181  curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true );
182  }
183  }
184  unset( $req ); // don't assign over this by accident
185 
186  $indexes = array_keys( $reqs );
187  if ( isset( $opts['usePipelining'] ) ) {
188  curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] );
189  }
190  if ( isset( $opts['maxConnsPerHost'] ) ) {
191  // Keep these sockets around as they may be needed later in the request
192  curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] );
193  }
194 
195  // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS)
196  $batches = array_chunk( $indexes, $this->maxConnsPerHost );
197  $infos = [];
198 
199  foreach ( $batches as $batch ) {
200  // Attach all cURL handles for this batch
201  foreach ( $batch as $index ) {
202  curl_multi_add_handle( $chm, $handles[$index] );
203  }
204  // Execute the cURL handles concurrently...
205  $active = null; // handles still being processed
206  do {
207  // Do any available work...
208  do {
209  $mrc = curl_multi_exec( $chm, $active );
210  $info = curl_multi_info_read( $chm );
211  if ( $info !== false ) {
212  $infos[(int)$info['handle']] = $info;
213  }
214  } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
215  // Wait (if possible) for available work...
216  if ( $active > 0 && $mrc == CURLM_OK ) {
217  if ( curl_multi_select( $chm, 10 ) == -1 ) {
218  // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
219  usleep( 5000 ); // 5ms
220  }
221  }
222  } while ( $active > 0 && $mrc == CURLM_OK );
223  }
224 
225  // Remove all of the added cURL handles and check for errors...
226  foreach ( $reqs as $index => &$req ) {
227  $ch = $handles[$index];
228  curl_multi_remove_handle( $chm, $ch );
229 
230  if ( isset( $infos[(int)$ch] ) ) {
231  $info = $infos[(int)$ch];
232  $errno = $info['result'];
233  if ( $errno !== 0 ) {
234  $req['response']['error'] = "(curl error: $errno)";
235  if ( function_exists( 'curl_strerror' ) ) {
236  $req['response']['error'] .= " " . curl_strerror( $errno );
237  }
238  }
239  } else {
240  $req['response']['error'] = "(curl error: no status set)";
241  }
242 
243  // For convenience with the list() operator
244  $req['response'][0] = $req['response']['code'];
245  $req['response'][1] = $req['response']['reason'];
246  $req['response'][2] = $req['response']['headers'];
247  $req['response'][3] = $req['response']['body'];
248  $req['response'][4] = $req['response']['error'];
249  curl_close( $ch );
250  // Close any string wrapper file handles
251  if ( isset( $req['_closeHandle'] ) ) {
252  fclose( $req['_closeHandle'] );
253  unset( $req['_closeHandle'] );
254  }
255  }
256  unset( $req ); // don't assign over this by accident
257 
258  // Restore the default settings
259  curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining );
260  curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
261 
262  return $reqs;
263  }
264 
273  protected function getCurlHandle( array &$req, array $opts = [] ) {
274  $ch = curl_init();
275 
276  curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT,
277  isset( $opts['connTimeout'] ) ? $opts['connTimeout'] : $this->connTimeout );
278  curl_setopt( $ch, CURLOPT_PROXY, isset( $req['proxy'] ) ? $req['proxy'] : $this->proxy );
279  curl_setopt( $ch, CURLOPT_TIMEOUT,
280  isset( $opts['reqTimeout'] ) ? $opts['reqTimeout'] : $this->reqTimeout );
281  curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
282  curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
283  curl_setopt( $ch, CURLOPT_HEADER, 0 );
284  if ( !is_null( $this->caBundlePath ) ) {
285  curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
286  curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
287  }
288  curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
289 
290  $url = $req['url'];
291  $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
292  if ( $query != '' ) {
293  $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
294  }
295  curl_setopt( $ch, CURLOPT_URL, $url );
296 
297  curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
298  if ( $req['method'] === 'HEAD' ) {
299  curl_setopt( $ch, CURLOPT_NOBODY, 1 );
300  }
301 
302  if ( $req['method'] === 'PUT' ) {
303  curl_setopt( $ch, CURLOPT_PUT, 1 );
304  if ( is_resource( $req['body'] ) ) {
305  curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
306  if ( isset( $req['headers']['content-length'] ) ) {
307  curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
308  } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
309  $req['headers']['transfer-encoding'] === 'chunks'
310  ) {
311  curl_setopt( $ch, CURLOPT_UPLOAD, true );
312  } else {
313  throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
314  }
315  } elseif ( $req['body'] !== '' ) {
316  $fp = fopen( "php://temp", "wb+" );
317  fwrite( $fp, $req['body'], strlen( $req['body'] ) );
318  rewind( $fp );
319  curl_setopt( $ch, CURLOPT_INFILE, $fp );
320  curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
321  $req['_closeHandle'] = $fp; // remember to close this later
322  } else {
323  curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
324  }
325  curl_setopt( $ch, CURLOPT_READFUNCTION,
326  function ( $ch, $fd, $length ) {
327  $data = fread( $fd, $length );
328  $len = strlen( $data );
329  return $data;
330  }
331  );
332  } elseif ( $req['method'] === 'POST' ) {
333  curl_setopt( $ch, CURLOPT_POST, 1 );
334  // Don't interpret POST parameters starting with '@' as file uploads, because this
335  // makes it impossible to POST plain values starting with '@' (and causes security
336  // issues potentially exposing the contents of local files).
337  // The PHP manual says this option was introduced in PHP 5.5 defaults to true in PHP 5.6,
338  // but we support lower versions, and the option doesn't exist in HHVM 5.6.99.
339  if ( defined( 'CURLOPT_SAFE_UPLOAD' ) ) {
340  curl_setopt( $ch, CURLOPT_SAFE_UPLOAD, true );
341  } elseif ( is_array( $req['body'] ) ) {
342  // In PHP 5.2 and later, '@' is interpreted as a file upload if POSTFIELDS
343  // is an array, but not if it's a string. So convert $req['body'] to a string
344  // for safety.
345  $req['body'] = http_build_query( $req['body'] );
346  }
347  curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
348  } else {
349  if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
350  throw new Exception( "HTTP body specified for a non PUT/POST request." );
351  }
352  $req['headers']['content-length'] = 0;
353  }
354 
355  if ( !isset( $req['headers']['user-agent'] ) ) {
356  $req['headers']['user-agent'] = $this->userAgent;
357  }
358 
359  $headers = [];
360  foreach ( $req['headers'] as $name => $value ) {
361  if ( strpos( $name, ': ' ) ) {
362  throw new Exception( "Headers cannot have ':' in the name." );
363  }
364  $headers[] = $name . ': ' . trim( $value );
365  }
366  curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
367 
368  curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
369  function ( $ch, $header ) use ( &$req ) {
370  if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
371  header( $header );
372  }
373  $length = strlen( $header );
374  $matches = [];
375  if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
376  $req['response']['code'] = (int)$matches[2];
377  $req['response']['reason'] = trim( $matches[3] );
378  return $length;
379  }
380  if ( strpos( $header, ":" ) === false ) {
381  return $length;
382  }
383  list( $name, $value ) = explode( ":", $header, 2 );
384  $req['response']['headers'][strtolower( $name )] = trim( $value );
385  return $length;
386  }
387  );
388 
389  if ( isset( $req['stream'] ) ) {
390  // Don't just use CURLOPT_FILE as that might give:
391  // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE*
392  // The callback here handles both normal files and php://temp handles.
393  curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
394  function ( $ch, $data ) use ( &$req ) {
395  return fwrite( $req['stream'], $data );
396  }
397  );
398  } else {
399  curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
400  function ( $ch, $data ) use ( &$req ) {
401  $req['response']['body'] .= $data;
402  return strlen( $data );
403  }
404  );
405  }
406 
407  return $ch;
408  }
409 
413  protected function getCurlMulti() {
414  if ( !$this->multiHandle ) {
415  $cmh = curl_multi_init();
416  curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining );
417  curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
418  $this->multiHandle = $cmh;
419  }
420  return $this->multiHandle;
421  }
422 
423  function __destruct() {
424  if ( $this->multiHandle ) {
425  curl_multi_close( $this->multiHandle );
426  }
427  }
428 }
MultiHttpClient
Class to handle concurrent HTTP requests.
Definition: MultiHttpClient.php:45
MultiHttpClient\$usePipelining
bool $usePipelining
Definition: MultiHttpClient.php:55
captcha-old.count
count
Definition: captcha-old.py:225
MultiHttpClient\$connTimeout
integer $connTimeout
Definition: MultiHttpClient.php:51
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
$req
this hook is for auditing only $req
Definition: hooks.txt:990
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:304
MultiHttpClient\run
run(array $req, array $opts=[])
Execute an HTTP(S) request.
Definition: MultiHttpClient.php:109
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MultiHttpClient\__destruct
__destruct()
Definition: MultiHttpClient.php:423
$query
null for the wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1572
MultiHttpClient\getCurlHandle
getCurlHandle(array &$req, array $opts=[])
Definition: MultiHttpClient.php:273
MultiHttpClient\$reqTimeout
integer $reqTimeout
Definition: MultiHttpClient.php:53
$matches
$matches
Definition: NoLocalSettings.php:24
MultiHttpClient\$caBundlePath
string null $caBundlePath
SSL certificates path
Definition: MultiHttpClient.php:49
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
$value
$value
Definition: styleTest.css.php:45
$header
$header
Definition: updateCredits.php:35
MultiHttpClient\$userAgent
string $userAgent
Definition: MultiHttpClient.php:61
MultiHttpClient\$multiHandle
resource $multiHandle
Definition: MultiHttpClient.php:47
MultiHttpClient\runMulti
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests concurrently.
Definition: MultiHttpClient.php:139
MultiHttpClient\$proxy
string null $proxy
proxy
Definition: MultiHttpClient.php:59
MultiHttpClient\getCurlMulti
getCurlMulti()
Definition: MultiHttpClient.php:413
MultiHttpClient\__construct
__construct(array $options)
Definition: MultiHttpClient.php:73
MultiHttpClient\$maxConnsPerHost
integer $maxConnsPerHost
Definition: MultiHttpClient.php:57
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$batch
$batch
Definition: linkcache.txt:23
$options
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1049
array
the array() calling protocol came about after MediaWiki 1.4rc1.