MediaWiki REL1_31
MultiHttpClient.php
Go to the documentation of this file.
1<?php
23use Psr\Log\LoggerAwareInterface;
24use Psr\Log\LoggerInterface;
25use Psr\Log\NullLogger;
26
48class MultiHttpClient implements LoggerAwareInterface {
50 protected $multiHandle = null; // curl_multi handle
52 protected $caBundlePath;
54 protected $connTimeout = 10;
56 protected $reqTimeout = 300;
58 protected $usePipelining = false;
60 protected $maxConnsPerHost = 50;
62 protected $proxy;
64 protected $userAgent = 'wikimedia/multi-http-client v1.0';
66 protected $logger;
67
78 public function __construct( array $options ) {
79 if ( isset( $options['caBundlePath'] ) ) {
80 $this->caBundlePath = $options['caBundlePath'];
81 if ( !file_exists( $this->caBundlePath ) ) {
82 throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
83 }
84 }
85 static $opts = [
86 'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost',
87 'proxy', 'userAgent', 'logger'
88 ];
89 foreach ( $opts as $key ) {
90 if ( isset( $options[$key] ) ) {
91 $this->$key = $options[$key];
92 }
93 }
94 if ( $this->logger === null ) {
95 $this->logger = new NullLogger;
96 }
97 }
98
118 public function run( array $req, array $opts = [] ) {
119 return $this->runMulti( [ $req ], $opts )[0]['response'];
120 }
121
148 public function runMulti( array $reqs, array $opts = [] ) {
149 $chm = $this->getCurlMulti();
150
151 // Normalize $reqs and add all of the required cURL handles...
152 $handles = [];
153 foreach ( $reqs as $index => &$req ) {
154 $req['response'] = [
155 'code' => 0,
156 'reason' => '',
157 'headers' => [],
158 'body' => '',
159 'error' => ''
160 ];
161 if ( isset( $req[0] ) ) {
162 $req['method'] = $req[0]; // short-form
163 unset( $req[0] );
164 }
165 if ( isset( $req[1] ) ) {
166 $req['url'] = $req[1]; // short-form
167 unset( $req[1] );
168 }
169 if ( !isset( $req['method'] ) ) {
170 throw new Exception( "Request has no 'method' field set." );
171 } elseif ( !isset( $req['url'] ) ) {
172 throw new Exception( "Request has no 'url' field set." );
173 }
174 $this->logger->debug( "{$req['method']}: {$req['url']}" );
175 $req['query'] = isset( $req['query'] ) ? $req['query'] : [];
176 $headers = []; // normalized headers
177 if ( isset( $req['headers'] ) ) {
178 foreach ( $req['headers'] as $name => $value ) {
179 $headers[strtolower( $name )] = $value;
180 }
181 }
182 $req['headers'] = $headers;
183 if ( !isset( $req['body'] ) ) {
184 $req['body'] = '';
185 $req['headers']['content-length'] = 0;
186 }
187 $req['flags'] = isset( $req['flags'] ) ? $req['flags'] : [];
188 $handles[$index] = $this->getCurlHandle( $req, $opts );
189 if ( count( $reqs ) > 1 ) {
190 // https://github.com/guzzle/guzzle/issues/349
191 curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true );
192 }
193 }
194 unset( $req ); // don't assign over this by accident
195
196 $indexes = array_keys( $reqs );
197 if ( isset( $opts['usePipelining'] ) ) {
198 curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] );
199 }
200 if ( isset( $opts['maxConnsPerHost'] ) ) {
201 // Keep these sockets around as they may be needed later in the request
202 curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] );
203 }
204
205 // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS)
206 $batches = array_chunk( $indexes, $this->maxConnsPerHost );
207 $infos = [];
208
209 foreach ( $batches as $batch ) {
210 // Attach all cURL handles for this batch
211 foreach ( $batch as $index ) {
212 curl_multi_add_handle( $chm, $handles[$index] );
213 }
214 // Execute the cURL handles concurrently...
215 $active = null; // handles still being processed
216 do {
217 // Do any available work...
218 do {
219 $mrc = curl_multi_exec( $chm, $active );
220 $info = curl_multi_info_read( $chm );
221 if ( $info !== false ) {
222 $infos[(int)$info['handle']] = $info;
223 }
224 } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
225 // Wait (if possible) for available work...
226 if ( $active > 0 && $mrc == CURLM_OK ) {
227 if ( curl_multi_select( $chm, 10 ) == -1 ) {
228 // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
229 usleep( 5000 ); // 5ms
230 }
231 }
232 } while ( $active > 0 && $mrc == CURLM_OK );
233 }
234
235 // Remove all of the added cURL handles and check for errors...
236 foreach ( $reqs as $index => &$req ) {
237 $ch = $handles[$index];
238 curl_multi_remove_handle( $chm, $ch );
239
240 if ( isset( $infos[(int)$ch] ) ) {
241 $info = $infos[(int)$ch];
242 $errno = $info['result'];
243 if ( $errno !== 0 ) {
244 $req['response']['error'] = "(curl error: $errno)";
245 if ( function_exists( 'curl_strerror' ) ) {
246 $req['response']['error'] .= " " . curl_strerror( $errno );
247 }
248 $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
249 $req['response']['error'] );
250 }
251 } else {
252 $req['response']['error'] = "(curl error: no status set)";
253 }
254
255 // For convenience with the list() operator
256 $req['response'][0] = $req['response']['code'];
257 $req['response'][1] = $req['response']['reason'];
258 $req['response'][2] = $req['response']['headers'];
259 $req['response'][3] = $req['response']['body'];
260 $req['response'][4] = $req['response']['error'];
261 curl_close( $ch );
262 // Close any string wrapper file handles
263 if ( isset( $req['_closeHandle'] ) ) {
264 fclose( $req['_closeHandle'] );
265 unset( $req['_closeHandle'] );
266 }
267 }
268 unset( $req ); // don't assign over this by accident
269
270 // Restore the default settings
271 curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining );
272 curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
273
274 return $reqs;
275 }
276
285 protected function getCurlHandle( array &$req, array $opts = [] ) {
286 $ch = curl_init();
287
288 curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT,
289 isset( $opts['connTimeout'] ) ? $opts['connTimeout'] : $this->connTimeout );
290 curl_setopt( $ch, CURLOPT_PROXY, isset( $req['proxy'] ) ? $req['proxy'] : $this->proxy );
291 curl_setopt( $ch, CURLOPT_TIMEOUT,
292 isset( $opts['reqTimeout'] ) ? $opts['reqTimeout'] : $this->reqTimeout );
293 curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
294 curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
295 curl_setopt( $ch, CURLOPT_HEADER, 0 );
296 if ( !is_null( $this->caBundlePath ) ) {
297 curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
298 curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
299 }
300 curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
301
302 $url = $req['url'];
303 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
304 if ( $query != '' ) {
305 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
306 }
307 curl_setopt( $ch, CURLOPT_URL, $url );
308
309 curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
310 if ( $req['method'] === 'HEAD' ) {
311 curl_setopt( $ch, CURLOPT_NOBODY, 1 );
312 }
313
314 if ( $req['method'] === 'PUT' ) {
315 curl_setopt( $ch, CURLOPT_PUT, 1 );
316 if ( is_resource( $req['body'] ) ) {
317 curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
318 if ( isset( $req['headers']['content-length'] ) ) {
319 curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
320 } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
321 $req['headers']['transfer-encoding'] === 'chunks'
322 ) {
323 curl_setopt( $ch, CURLOPT_UPLOAD, true );
324 } else {
325 throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
326 }
327 } elseif ( $req['body'] !== '' ) {
328 $fp = fopen( "php://temp", "wb+" );
329 fwrite( $fp, $req['body'], strlen( $req['body'] ) );
330 rewind( $fp );
331 curl_setopt( $ch, CURLOPT_INFILE, $fp );
332 curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
333 $req['_closeHandle'] = $fp; // remember to close this later
334 } else {
335 curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
336 }
337 curl_setopt( $ch, CURLOPT_READFUNCTION,
338 function ( $ch, $fd, $length ) {
339 $data = fread( $fd, $length );
340 $len = strlen( $data );
341 return $data;
342 }
343 );
344 } elseif ( $req['method'] === 'POST' ) {
345 curl_setopt( $ch, CURLOPT_POST, 1 );
346 // Don't interpret POST parameters starting with '@' as file uploads, because this
347 // makes it impossible to POST plain values starting with '@' (and causes security
348 // issues potentially exposing the contents of local files).
349 curl_setopt( $ch, CURLOPT_SAFE_UPLOAD, true );
350 curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
351 } else {
352 if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
353 throw new Exception( "HTTP body specified for a non PUT/POST request." );
354 }
355 $req['headers']['content-length'] = 0;
356 }
357
358 if ( !isset( $req['headers']['user-agent'] ) ) {
359 $req['headers']['user-agent'] = $this->userAgent;
360 }
361
362 $headers = [];
363 foreach ( $req['headers'] as $name => $value ) {
364 if ( strpos( $name, ': ' ) ) {
365 throw new Exception( "Headers cannot have ':' in the name." );
366 }
367 $headers[] = $name . ': ' . trim( $value );
368 }
369 curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
370
371 curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
372 function ( $ch, $header ) use ( &$req ) {
373 if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
374 header( $header );
375 }
376 $length = strlen( $header );
377 $matches = [];
378 if ( preg_match( "/^(HTTP\/(?:1\.[01]|2)) (\d{3}) (.*)/", $header, $matches ) ) {
379 $req['response']['code'] = (int)$matches[2];
380 $req['response']['reason'] = trim( $matches[3] );
381 return $length;
382 }
383 if ( strpos( $header, ":" ) === false ) {
384 return $length;
385 }
386 list( $name, $value ) = explode( ":", $header, 2 );
387 $req['response']['headers'][strtolower( $name )] = trim( $value );
388 return $length;
389 }
390 );
391
392 if ( isset( $req['stream'] ) ) {
393 // Don't just use CURLOPT_FILE as that might give:
394 // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE*
395 // The callback here handles both normal files and php://temp handles.
396 curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
397 function ( $ch, $data ) use ( &$req ) {
398 return fwrite( $req['stream'], $data );
399 }
400 );
401 } else {
402 curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
403 function ( $ch, $data ) use ( &$req ) {
404 $req['response']['body'] .= $data;
405 return strlen( $data );
406 }
407 );
408 }
409
410 return $ch;
411 }
412
417 protected function getCurlMulti() {
418 if ( !$this->multiHandle ) {
419 if ( !function_exists( 'curl_multi_init' ) ) {
420 throw new Exception( "PHP cURL extension missing. " .
421 "Check https://www.mediawiki.org/wiki/Manual:CURL" );
422 }
423 $cmh = curl_multi_init();
424 curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining );
425 curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
426 $this->multiHandle = $cmh;
427 }
428 return $this->multiHandle;
429 }
430
436 public function setLogger( LoggerInterface $logger ) {
437 $this->logger = $logger;
438 }
439
440 function __destruct() {
441 if ( $this->multiHandle ) {
442 curl_multi_close( $this->multiHandle );
443 }
444 }
445}
Class to handle concurrent HTTP requests.
runMulti(array $reqs, array $opts=[])
Execute a set of HTTP(S) requests concurrently.
__construct(array $options)
string null $proxy
proxy
string null $caBundlePath
SSL certificates path.
LoggerInterface $logger
setLogger(LoggerInterface $logger)
Register a logger.
run(array $req, array $opts=[])
Execute an HTTP(S) request.
getCurlHandle(array &$req, array $opts=[])
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
this hook is for auditing only $req
Definition hooks.txt:990
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2001
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition hooks.txt:1620
$batch
Definition linkcache.txt:23
$header