MediaWiki  master
MWHttpRequest.php
Go to the documentation of this file.
1 <?php
23 use Psr\Log\LoggerAwareInterface;
24 use Psr\Log\LoggerInterface;
25 use Psr\Log\NullLogger;
26 
34 abstract class MWHttpRequest implements LoggerAwareInterface {
35  public const SUPPORTS_FILE_POSTS = false;
36 
40  protected $timeout = 'default';
41 
42  protected $content;
43  protected $headersOnly = null;
44  protected $postData = null;
45  protected $proxy = null;
46  protected $noProxy = false;
47  protected $sslVerifyHost = true;
48  protected $sslVerifyCert = true;
49  protected $caInfo = null;
50  protected $method = "GET";
52  protected $reqHeaders = [];
53  protected $url;
54  protected $parsedUrl;
56  protected $callback;
57  protected $maxRedirects = 5;
58  protected $followRedirects = false;
59  protected $connectTimeout;
60 
64  protected $cookieJar;
65 
66  protected $headerList = [];
67  protected $respVersion = "0.9";
68  protected $respStatus = "200 Ok";
70  protected $respHeaders = [];
71 
73  protected $status;
74 
78  protected $profiler;
79 
83  protected $profileName;
84 
88  protected $logger;
89 
99  public function __construct(
100  $url, array $options = [], $caller = __METHOD__, Profiler $profiler = null
101  ) {
102  $this->url = wfExpandUrl( $url, PROTO_HTTP );
103  $this->parsedUrl = wfParseUrl( $this->url );
104 
105  $this->logger = $options['logger'] ?? new NullLogger();
106 
107  if ( !$this->parsedUrl || !self::isValidURI( $this->url ) ) {
108  $this->status = StatusValue::newFatal( 'http-invalid-url', $url );
109  } else {
110  $this->status = StatusValue::newGood( 100 ); // continue
111  }
112 
113  if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
114  $this->timeout = $options['timeout'];
115  } else {
116  // The timeout should always be set by HttpRequestFactory, so this
117  // should only happen if the class was directly constructed
118  wfDeprecated( __METHOD__ . ' without the timeout option', '1.35' );
119  $httpTimeout = MediaWikiServices::getInstance()->getMainConfig()->get(
120  MainConfigNames::HTTPTimeout );
121  $this->timeout = $httpTimeout;
122  }
123  if ( isset( $options['connectTimeout'] ) && $options['connectTimeout'] != 'default' ) {
124  $this->connectTimeout = $options['connectTimeout'];
125  } else {
126  // The timeout should always be set by HttpRequestFactory, so this
127  // should only happen if the class was directly constructed
128  wfDeprecated( __METHOD__ . ' without the connectTimeout option', '1.35' );
129  $httpConnectTimeout = MediaWikiServices::getInstance()->getMainConfig()->get(
130  MainConfigNames::HTTPConnectTimeout );
131  $this->connectTimeout = $httpConnectTimeout;
132  }
133  if ( isset( $options['userAgent'] ) ) {
134  $this->setUserAgent( $options['userAgent'] );
135  }
136  if ( isset( $options['username'] ) && isset( $options['password'] ) ) {
137  $this->setHeader(
138  'Authorization',
139  'Basic ' . base64_encode( $options['username'] . ':' . $options['password'] )
140  );
141  }
142  if ( isset( $options['originalRequest'] ) ) {
143  $this->setOriginalRequest( $options['originalRequest'] );
144  }
145 
146  $this->setHeader( 'X-Request-Id', WebRequest::getRequestId() );
147 
148  $members = [ "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
149  "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" ];
150 
151  foreach ( $members as $o ) {
152  if ( isset( $options[$o] ) ) {
153  // ensure that MWHttpRequest::method is always
154  // uppercased. T38137
155  if ( $o == 'method' ) {
156  // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
157  $options[$o] = strtoupper( $options[$o] );
158  }
159  $this->$o = $options[$o];
160  }
161  }
162 
163  if ( $this->noProxy ) {
164  $this->proxy = ''; // noProxy takes precedence
165  }
166 
167  // Profile based on what's calling us
168  $this->profiler = $profiler;
169  $this->profileName = $caller;
170  }
171 
175  public function setLogger( LoggerInterface $logger ) {
176  $this->logger = $logger;
177  }
178 
184  public static function canMakeRequests() {
185  return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' );
186  }
187 
198  public static function factory( $url, array $options = null, $caller = __METHOD__ ) {
199  if ( $options === null ) {
200  $options = [];
201  }
202  return MediaWikiServices::getInstance()->getHttpRequestFactory()
203  ->create( $url, $options, $caller );
204  }
205 
211  public function getContent() {
212  return $this->content;
213  }
214 
221  public function setData( array $args ) {
222  $this->postData = $args;
223  }
224 
230  protected function proxySetup() {
231  $httpProxy = MediaWikiServices::getInstance()->getMainConfig()->get(
232  MainConfigNames::HTTPProxy );
233  $localHTTPProxy = MediaWikiServices::getInstance()->getMainConfig()->get(
234  MainConfigNames::LocalHTTPProxy );
235  // If proxies are disabled, clear any other proxy
236  if ( $this->noProxy ) {
237  $this->proxy = '';
238  return;
239  }
240 
241  // If there is an explicit proxy already set, use it
242  if ( $this->proxy ) {
243  return;
244  }
245 
246  // Otherwise, fallback to $wgLocalHTTPProxy for local URLs
247  // or $wgHTTPProxy for everything else
248  if ( self::isLocalURL( $this->url ) ) {
249  if ( $localHTTPProxy !== false ) {
250  $this->setReverseProxy( $localHTTPProxy );
251  }
252  } else {
253  $this->proxy = (string)$httpProxy;
254  }
255  }
256 
267  protected function setReverseProxy( string $proxy ) {
268  $parsedProxy = wfParseUrl( $proxy );
269  if ( $parsedProxy === false ) {
270  throw new Exception( "Invalid reverseProxy configured: $proxy" );
271  }
272  // Set the current host in the Host header
273  $this->setHeader( 'Host', $this->parsedUrl['host'] );
274  // Replace scheme, host and port in the request
275  $this->parsedUrl['scheme'] = $parsedProxy['scheme'];
276  $this->parsedUrl['host'] = $parsedProxy['host'];
277  if ( isset( $parsedProxy['port'] ) ) {
278  $this->parsedUrl['port'] = $parsedProxy['port'];
279  } else {
280  unset( $this->parsedUrl['port'] );
281  }
282  $this->url = wfAssembleUrl( $this->parsedUrl );
283  // Mark that we're already using a proxy
284  $this->noProxy = true;
285  }
286 
293  private static function isLocalURL( $url ) {
294  $commandLineMode = MediaWikiServices::getInstance()->getMainConfig()->get( 'CommandLineMode' );
295  $localVirtualHosts = MediaWikiServices::getInstance()->getMainConfig()->get(
296  MainConfigNames::LocalVirtualHosts );
297  if ( $commandLineMode ) {
298  return false;
299  }
300 
301  // Extract host part
302  $matches = [];
303  if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) {
304  $host = $matches[1];
305  // Split up dotwise
306  $domainParts = explode( '.', $host );
307  // Check if this domain or any superdomain is listed as a local virtual host
308  $domainParts = array_reverse( $domainParts );
309 
310  $domain = '';
311  $countParts = count( $domainParts );
312  for ( $i = 0; $i < $countParts; $i++ ) {
313  $domainPart = $domainParts[$i];
314  if ( $i == 0 ) {
315  $domain = $domainPart;
316  } else {
317  $domain = $domainPart . '.' . $domain;
318  }
319 
320  if ( in_array( $domain, $localVirtualHosts ) ) {
321  return true;
322  }
323  }
324  }
325 
326  return false;
327  }
328 
332  public function setUserAgent( $UA ) {
333  $this->setHeader( 'User-Agent', $UA );
334  }
335 
341  public function setHeader( $name, $value ) {
342  // I feel like I should normalize the case here...
343  $this->reqHeaders[$name] = $value;
344  }
345 
350  protected function getHeaderList() {
351  $list = [];
352 
353  if ( $this->cookieJar ) {
354  $this->reqHeaders['Cookie'] =
355  $this->cookieJar->serializeToHttpRequest(
356  $this->parsedUrl['path'],
357  $this->parsedUrl['host']
358  );
359  }
360 
361  foreach ( $this->reqHeaders as $name => $value ) {
362  $list[] = "$name: $value";
363  }
364 
365  return $list;
366  }
367 
386  public function setCallback( $callback ) {
387  $this->doSetCallback( $callback );
388  }
389 
397  protected function doSetCallback( $callback ) {
398  if ( $callback === null ) {
399  $callback = [ $this, 'read' ];
400  } elseif ( !is_callable( $callback ) ) {
401  $this->status->fatal( 'http-internal-error' );
402  throw new InvalidArgumentException( __METHOD__ . ': invalid callback' );
403  }
404  $this->callback = $callback;
405  }
406 
416  public function read( $fh, $content ) {
417  $this->content .= $content;
418  return strlen( $content );
419  }
420 
427  public function execute() {
428  throw new LogicException( 'children must override this' );
429  }
430 
431  protected function prepare() {
432  $this->content = "";
433 
434  if ( strtoupper( $this->method ) == "HEAD" ) {
435  $this->headersOnly = true;
436  }
437 
438  $this->proxySetup(); // set up any proxy as needed
439 
440  if ( !$this->callback ) {
441  $this->doSetCallback( null );
442  }
443 
444  if ( !isset( $this->reqHeaders['User-Agent'] ) ) {
445  $http = MediaWikiServices::getInstance()->getHttpRequestFactory();
446  $this->setUserAgent( $http->getUserAgent() );
447  }
448  }
449 
455  protected function parseHeader() {
456  $lastname = "";
457 
458  // Failure without (valid) headers gets a response status of zero
459  if ( !$this->status->isOK() ) {
460  $this->respStatus = '0 Error';
461  }
462 
463  foreach ( $this->headerList as $header ) {
464  if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
465  $this->respVersion = $match[1];
466  $this->respStatus = $match[2];
467  } elseif ( preg_match( "#^[ \t]#", $header ) ) {
468  $last = count( $this->respHeaders[$lastname] ) - 1;
469  $this->respHeaders[$lastname][$last] .= "\r\n$header";
470  } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
471  $this->respHeaders[strtolower( $match[1] )][] = $match[2];
472  $lastname = strtolower( $match[1] );
473  }
474  }
475 
476  $this->parseCookies();
477  }
478 
486  protected function setStatus() {
487  if ( !$this->respHeaders ) {
488  $this->parseHeader();
489  }
490 
491  if ( (int)$this->respStatus > 0 && (int)$this->respStatus < 400 ) {
492  $this->status->setResult( true, (int)$this->respStatus );
493  } else {
494  [ $code, $message ] = explode( " ", $this->respStatus, 2 );
495  $this->status->setResult( false, (int)$this->respStatus );
496  $this->status->fatal( "http-bad-status", $code, $message );
497  }
498  }
499 
507  public function getStatus() {
508  if ( !$this->respHeaders ) {
509  $this->parseHeader();
510  }
511 
512  return (int)$this->respStatus;
513  }
514 
520  public function isRedirect() {
521  if ( !$this->respHeaders ) {
522  $this->parseHeader();
523  }
524 
525  $status = (int)$this->respStatus;
526 
527  if ( $status >= 300 && $status <= 303 ) {
528  return true;
529  }
530 
531  return false;
532  }
533 
543  public function getResponseHeaders() {
544  if ( !$this->respHeaders ) {
545  $this->parseHeader();
546  }
547 
548  return $this->respHeaders;
549  }
550 
557  public function getResponseHeader( $header ) {
558  if ( !$this->respHeaders ) {
559  $this->parseHeader();
560  }
561 
562  if ( isset( $this->respHeaders[strtolower( $header )] ) ) {
563  $v = $this->respHeaders[strtolower( $header )];
564  return $v[count( $v ) - 1];
565  }
566 
567  return null;
568  }
569 
577  public function setCookieJar( CookieJar $jar ) {
578  $this->cookieJar = $jar;
579  }
580 
586  public function getCookieJar() {
587  if ( !$this->respHeaders ) {
588  $this->parseHeader();
589  }
590 
591  return $this->cookieJar;
592  }
593 
603  public function setCookie( $name, $value, array $attr = [] ) {
604  if ( !$this->cookieJar ) {
605  $this->cookieJar = new CookieJar;
606  }
607 
608  if ( $this->parsedUrl && !isset( $attr['domain'] ) ) {
609  $attr['domain'] = $this->parsedUrl['host'];
610  }
611 
612  $this->cookieJar->setCookie( $name, $value, $attr );
613  }
614 
618  protected function parseCookies() {
619  if ( !$this->cookieJar ) {
620  $this->cookieJar = new CookieJar;
621  }
622 
623  if ( isset( $this->respHeaders['set-cookie'] ) ) {
624  $url = parse_url( $this->getFinalUrl() );
625  if ( !isset( $url['host'] ) ) {
626  $this->status->fatal( 'http-invalid-url', $url );
627  } else {
628  foreach ( $this->respHeaders['set-cookie'] as $cookie ) {
629  $this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] );
630  }
631  }
632  }
633  }
634 
651  public function getFinalUrl() {
652  $headers = $this->getResponseHeaders();
653 
654  // return full url (fix for incorrect but handled relative location)
655  if ( isset( $headers['location'] ) ) {
656  $locations = $headers['location'];
657  $domain = '';
658  $foundRelativeURI = false;
659  $countLocations = count( $locations );
660 
661  for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
662  $url = parse_url( $locations[$i] );
663 
664  if ( isset( $url['scheme'] ) && isset( $url['host'] ) ) {
665  $domain = $url['scheme'] . '://' . $url['host'];
666  break; // found correct URI (with host)
667  } else {
668  $foundRelativeURI = true;
669  }
670  }
671 
672  if ( !$foundRelativeURI ) {
673  return $locations[$countLocations - 1];
674  }
675  if ( $domain ) {
676  return $domain . $locations[$countLocations - 1];
677  }
678  $url = parse_url( $this->url );
679  if ( isset( $url['scheme'] ) && isset( $url['host'] ) ) {
680  return $url['scheme'] . '://' . $url['host'] .
681  $locations[$countLocations - 1];
682  }
683  }
684 
685  return $this->url;
686  }
687 
693  public function canFollowRedirects() {
694  return true;
695  }
696 
709  public function setOriginalRequest( $originalRequest ) {
710  if ( $originalRequest instanceof WebRequest ) {
711  $originalRequest = [
712  'ip' => $originalRequest->getIP(),
713  'userAgent' => $originalRequest->getHeader( 'User-Agent' ),
714  ];
715  } elseif (
716  !is_array( $originalRequest )
717  || array_diff( [ 'ip', 'userAgent' ], array_keys( $originalRequest ) )
718  ) {
719  throw new InvalidArgumentException( __METHOD__ . ': $originalRequest must be a '
720  . "WebRequest or an array with 'ip' and 'userAgent' keys" );
721  }
722 
723  $this->reqHeaders['X-Forwarded-For'] = $originalRequest['ip'];
724  $this->reqHeaders['X-Original-User-Agent'] = $originalRequest['userAgent'];
725  }
726 
743  public static function isValidURI( $uri ) {
744  return (bool)preg_match(
745  '/^https?:\/\/[^\/\s]\S*$/D',
746  $uri
747  );
748  }
749 }
const PROTO_HTTP
Definition: Defines.php:193
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfIniGetBool( $setting)
Safety wrapper around ini_get() for boolean settings.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
wfAssembleUrl( $urlParts)
This function will reassemble a URL parsed with wfParseURL.
$matches
Cookie jar to use with MWHttpRequest.
Definition: CookieJar.php:25
setCookie( $name, $value, $attr)
Set a cookie in the cookie jar.
Definition: CookieJar.php:36
This wrapper class will call out to curl (if available) or fallback to regular PHP if necessary for h...
getContent()
Get the body, or content, of the response to the request.
setLogger(LoggerInterface $logger)
setCookie( $name, $value, array $attr=[])
Sets a cookie.
getResponseHeaders()
Returns an associative array of response headers after the request has been executed.
doSetCallback( $callback)
Worker function for setting callbacks.
setHeader( $name, $value)
Set an arbitrary header.
getCookieJar()
Returns the cookie jar in use.
setReverseProxy(string $proxy)
Enable use of a reverse proxy in which the hostname is passed as a "Host" header, and the request is ...
setOriginalRequest( $originalRequest)
Set information about the original request.
string $profileName
isRedirect()
Returns true if the last status code was a redirect.
read( $fh, $content)
A generic callback to read the body of the response from a remote server.
getFinalUrl()
Returns the final URL after all redirections.
setStatus()
Sets HTTPRequest status member to a fatal value with the error message if the returned integer value ...
parseHeader()
Parses the headers, including the HTTP status code and any Set-Cookie headers.
canFollowRedirects()
Returns true if the backend can follow redirects.
__construct( $url, array $options=[], $caller=__METHOD__, Profiler $profiler=null)
setCallback( $callback)
Set a read callback to accept data read from the HTTP request.
static canMakeRequests()
Simple function to test if we can make any sort of requests at all, using cURL or fopen()
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
static isValidURI( $uri)
Check that the given URI is a valid one.
getStatus()
Get the integer value of the HTTP status code (e.g.
StatusValue $status
const SUPPORTS_FILE_POSTS
CookieJar $cookieJar
string[][] $respHeaders
execute()
Take care of whatever is necessary to perform the URI request.
getResponseHeader( $header)
Returns the value of the given response header.
LoggerInterface $logger
proxySetup()
Take care of setting up the proxy (do nothing if "noProxy" is set)
setData(array $args)
Set the parameters of the request.
parseCookies()
Parse the cookies in the response headers and store them in the cookie jar.
int string $timeout
getHeaderList()
Get an array of the headers.
callable $callback
Profiler $profiler
setCookieJar(CookieJar $jar)
Tells the MWHttpRequest object to use this pre-loaded CookieJar.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Profiler base class that defines the interface and some shared functionality.
Definition: Profiler.php:36
static newFatal( $message,... $parameters)
Factory function for fatal errors.
Definition: StatusValue.php:73
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:85
The WebRequest class encapsulates getting at data passed in the URL or via a POSTed form stripping il...
Definition: WebRequest.php:47
static getRequestId()
Get the current request ID.
Definition: WebRequest.php:344
if( $line===false) $args
Definition: mcc.php:124
$header