MediaWiki REL1_30
MultiHttpClient.php
Go to the documentation of this file.
1<?php
23use Psr\Log\LoggerAwareInterface;
24use Psr\Log\LoggerInterface;
25use Psr\Log\NullLogger;
26
48class MultiHttpClient implements LoggerAwareInterface {
50 protected $multiHandle = null; // curl_multi handle
52 protected $caBundlePath;
54 protected $connTimeout = 10;
56 protected $reqTimeout = 300;
58 protected $usePipelining = false;
60 protected $maxConnsPerHost = 50;
62 protected $proxy;
64 protected $userAgent = 'wikimedia/multi-http-client v1.0';
66 protected $logger;
67
78 public function __construct( array $options ) {
79 if ( isset( $options['caBundlePath'] ) ) {
80 $this->caBundlePath = $options['caBundlePath'];
81 if ( !file_exists( $this->caBundlePath ) ) {
82 throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
83 }
84 }
85 static $opts = [
86 'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost',
87 'proxy', 'userAgent', 'logger'
88 ];
89 foreach ( $opts as $key ) {
90 if ( isset( $options[$key] ) ) {
91 $this->$key = $options[$key];
92 }
93 }
94 if ( $this->logger === null ) {
95 $this->logger = new NullLogger;
96 }
97 }
98
118 public function run( array $req, array $opts = [] ) {
119 return $this->runMulti( [ $req ], $opts )[0]['response'];
120 }
121
148 public function runMulti( array $reqs, array $opts = [] ) {
149 $chm = $this->getCurlMulti();
150
151 // Normalize $reqs and add all of the required cURL handles...
152 $handles = [];
153 foreach ( $reqs as $index => &$req ) {
154 $req['response'] = [
155 'code' => 0,
156 'reason' => '',
157 'headers' => [],
158 'body' => '',
159 'error' => ''
160 ];
161 if ( isset( $req[0] ) ) {
162 $req['method'] = $req[0]; // short-form
163 unset( $req[0] );
164 }
165 if ( isset( $req[1] ) ) {
166 $req['url'] = $req[1]; // short-form
167 unset( $req[1] );
168 }
169 if ( !isset( $req['method'] ) ) {
170 throw new Exception( "Request has no 'method' field set." );
171 } elseif ( !isset( $req['url'] ) ) {
172 throw new Exception( "Request has no 'url' field set." );
173 }
174 $this->logger->debug( "{$req['method']}: {$req['url']}" );
175 $req['query'] = isset( $req['query'] ) ? $req['query'] : [];
176 $headers = []; // normalized headers
177 if ( isset( $req['headers'] ) ) {
178 foreach ( $req['headers'] as $name => $value ) {
179 $headers[strtolower( $name )] = $value;
180 }
181 }
182 $req['headers'] = $headers;
183 if ( !isset( $req['body'] ) ) {
184 $req['body'] = '';
185 $req['headers']['content-length'] = 0;
186 }
187 $req['flags'] = isset( $req['flags'] ) ? $req['flags'] : [];
188 $handles[$index] = $this->getCurlHandle( $req, $opts );
189 if ( count( $reqs ) > 1 ) {
190 // https://github.com/guzzle/guzzle/issues/349
191 curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true );
192 }
193 }
194 unset( $req ); // don't assign over this by accident
195
196 $indexes = array_keys( $reqs );
197 if ( isset( $opts['usePipelining'] ) ) {
198 curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] );
199 }
200 if ( isset( $opts['maxConnsPerHost'] ) ) {
201 // Keep these sockets around as they may be needed later in the request
202 curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] );
203 }
204
205 // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS)
206 $batches = array_chunk( $indexes, $this->maxConnsPerHost );
207 $infos = [];
208
209 foreach ( $batches as $batch ) {
210 // Attach all cURL handles for this batch
211 foreach ( $batch as $index ) {
212 curl_multi_add_handle( $chm, $handles[$index] );
213 }
214 // Execute the cURL handles concurrently...
215 $active = null; // handles still being processed
216 do {
217 // Do any available work...
218 do {
219 $mrc = curl_multi_exec( $chm, $active );
220 $info = curl_multi_info_read( $chm );
221 if ( $info !== false ) {
222 $infos[(int)$info['handle']] = $info;
223 }
224 } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
225 // Wait (if possible) for available work...
226 if ( $active > 0 && $mrc == CURLM_OK ) {
227 if ( curl_multi_select( $chm, 10 ) == -1 ) {
228 // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
229 usleep( 5000 ); // 5ms
230 }
231 }
232 } while ( $active > 0 && $mrc == CURLM_OK );
233 }
234
235 // Remove all of the added cURL handles and check for errors...
236 foreach ( $reqs as $index => &$req ) {
237 $ch = $handles[$index];
238 curl_multi_remove_handle( $chm, $ch );
239
240 if ( isset( $infos[(int)$ch] ) ) {
241 $info = $infos[(int)$ch];
242 $errno = $info['result'];
243 if ( $errno !== 0 ) {
244 $req['response']['error'] = "(curl error: $errno)";
245 if ( function_exists( 'curl_strerror' ) ) {
246 $req['response']['error'] .= " " . curl_strerror( $errno );
247 }
248 $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
249 $req['response']['error'] );
250 }
251 } else {
252 $req['response']['error'] = "(curl error: no status set)";
253 }
254
255 // For convenience with the list() operator
256 $req['response'][0] = $req['response']['code'];
257 $req['response'][1] = $req['response']['reason'];
258 $req['response'][2] = $req['response']['headers'];
259 $req['response'][3] = $req['response']['body'];
260 $req['response'][4] = $req['response']['error'];
261 curl_close( $ch );
262 // Close any string wrapper file handles
263 if ( isset( $req['_closeHandle'] ) ) {
264 fclose( $req['_closeHandle'] );
265 unset( $req['_closeHandle'] );
266 }
267 }
268 unset( $req ); // don't assign over this by accident
269
270 // Restore the default settings
271 curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining );
272 curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
273
274 return $reqs;
275 }
276
285 protected function getCurlHandle( array &$req, array $opts = [] ) {
286 $ch = curl_init();
287
288 curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT,
289 isset( $opts['connTimeout'] ) ? $opts['connTimeout'] : $this->connTimeout );
290 curl_setopt( $ch, CURLOPT_PROXY, isset( $req['proxy'] ) ? $req['proxy'] : $this->proxy );
291 curl_setopt( $ch, CURLOPT_TIMEOUT,
292 isset( $opts['reqTimeout'] ) ? $opts['reqTimeout'] : $this->reqTimeout );
293 curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
294 curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
295 curl_setopt( $ch, CURLOPT_HEADER, 0 );
296 if ( !is_null( $this->caBundlePath ) ) {
297 curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
298 curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
299 }
300 curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
301
302 $url = $req['url'];
303 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
304 if ( $query != '' ) {
305 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
306 }
307 curl_setopt( $ch, CURLOPT_URL, $url );
308
309 curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
310 if ( $req['method'] === 'HEAD' ) {
311 curl_setopt( $ch, CURLOPT_NOBODY, 1 );
312 }
313
314 if ( $req['method'] === 'PUT' ) {
315 curl_setopt( $ch, CURLOPT_PUT, 1 );
316 if ( is_resource( $req['body'] ) ) {
317 curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
318 if ( isset( $req['headers']['content-length'] ) ) {
319 curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
320 } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
321 $req['headers']['transfer-encoding'] === 'chunks'
322 ) {
323 curl_setopt( $ch, CURLOPT_UPLOAD, true );
324 } else {
325 throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
326 }
327 } elseif ( $req['body'] !== '' ) {
328 $fp = fopen( "php://temp", "wb+" );
329 fwrite( $fp, $req['body'], strlen( $req['body'] ) );
330 rewind( $fp );
331 curl_setopt( $ch, CURLOPT_INFILE, $fp );
332 curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
333 $req['_closeHandle'] = $fp; // remember to close this later
334 } else {
335 curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
336 }
337 curl_setopt( $ch, CURLOPT_READFUNCTION,
338 function ( $ch, $fd, $length ) {
339 $data = fread( $fd, $length );
340 $len = strlen( $data );
341 return $data;
342 }
343 );
344 } elseif ( $req['method'] === 'POST' ) {
345 curl_setopt( $ch, CURLOPT_POST, 1 );
346 // Don't interpret POST parameters starting with '@' as file uploads, because this
347 // makes it impossible to POST plain values starting with '@' (and causes security
348 // issues potentially exposing the contents of local files).
349 // The PHP manual says this option was introduced in PHP 5.5 defaults to true in PHP 5.6,
350 // but we support lower versions, and the option doesn't exist in HHVM 5.6.99.
351 if ( defined( 'CURLOPT_SAFE_UPLOAD' ) ) {
352 curl_setopt( $ch, CURLOPT_SAFE_UPLOAD, true );
353 } elseif ( is_array( $req['body'] ) ) {
354 // In PHP 5.2 and later, '@' is interpreted as a file upload if POSTFIELDS
355 // is an array, but not if it's a string. So convert $req['body'] to a string
356 // for safety.
357 $req['body'] = http_build_query( $req['body'] );
358 }
359 curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
360 } else {
361 if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
362 throw new Exception( "HTTP body specified for a non PUT/POST request." );
363 }
364 $req['headers']['content-length'] = 0;
365 }
366
367 if ( !isset( $req['headers']['user-agent'] ) ) {
368 $req['headers']['user-agent'] = $this->userAgent;
369 }
370
371 $headers = [];
372 foreach ( $req['headers'] as $name => $value ) {
373 if ( strpos( $name, ': ' ) ) {
374 throw new Exception( "Headers cannot have ':' in the name." );
375 }
376 $headers[] = $name . ': ' . trim( $value );
377 }
378 curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
379
380 curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
381 function ( $ch, $header ) use ( &$req ) {
382 if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
383 header( $header );
384 }
385 $length = strlen( $header );
386 $matches = [];
387 if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
388 $req['response']['code'] = (int)$matches[2];
389 $req['response']['reason'] = trim( $matches[3] );
390 return $length;
391 }
392 if ( strpos( $header, ":" ) === false ) {
393 return $length;
394 }
395 list( $name, $value ) = explode( ":", $header, 2 );
396 $req['response']['headers'][strtolower( $name )] = trim( $value );
397 return $length;
398 }
399 );
400
401 if ( isset( $req['stream'] ) ) {
402 // Don't just use CURLOPT_FILE as that might give:
403 // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE*
404 // The callback here handles both normal files and php://temp handles.
405 curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
406 function ( $ch, $data ) use ( &$req ) {
407 return fwrite( $req['stream'], $data );
408 }
409 );
410 } else {
411 curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
412 function ( $ch, $data ) use ( &$req ) {
413 $req['response']['body'] .= $data;
414 return strlen( $data );
415 }
416 );
417 }
418
419 return $ch;
420 }
421
425 protected function getCurlMulti() {
426 if ( !$this->multiHandle ) {
427 $cmh = curl_multi_init();
428 curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining );
429 curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
430 $this->multiHandle = $cmh;
431 }
432 return $this->multiHandle;
433 }
434
440 public function setLogger( LoggerInterface $logger ) {
441 $this->logger = $logger;
442 }
443
444 function __destruct() {
445 if ( $this->multiHandle ) {
446 curl_multi_close( $this->multiHandle );
447 }
448 }
449}
Class to handle concurrent HTTP requests.
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests concurrently.
__construct(array $options)
string null $proxy
proxy
string null $caBundlePath
SSL certificates path
LoggerInterface $logger
setLogger(LoggerInterface $logger)
Register a logger.
run(array $req, array $opts=[])
Execute an HTTP(S) request.
getCurlHandle(array &$req, array $opts=[])
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
this hook is for auditing only $req
Definition hooks.txt:988
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1971
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition hooks.txt:1610
$batch
Definition linkcache.txt:23
$header