MediaWiki  master
MWHttpRequest.php
Go to the documentation of this file.
1 <?php
23 use Psr\Log\LoggerAwareInterface;
24 use Psr\Log\LoggerInterface;
25 use Psr\Log\NullLogger;
26 
34 abstract class MWHttpRequest implements LoggerAwareInterface {
35  public const SUPPORTS_FILE_POSTS = false;
36 
40  protected $timeout = 'default';
41 
42  protected $content;
43  protected $headersOnly = null;
44  protected $postData = null;
45  protected $proxy = null;
46  protected $noProxy = false;
47  protected $sslVerifyHost = true;
48  protected $sslVerifyCert = true;
49  protected $caInfo = null;
50  protected $method = "GET";
52  protected $reqHeaders = [];
53  protected $url;
54  protected $parsedUrl;
56  protected $callback;
57  protected $maxRedirects = 5;
58  protected $followRedirects = false;
59  protected $connectTimeout;
60 
64  protected $cookieJar;
65 
66  protected $headerList = [];
67  protected $respVersion = "0.9";
68  protected $respStatus = "200 Ok";
70  protected $respHeaders = [];
71 
73  protected $status;
74 
78  protected $profiler;
79 
83  protected $profileName;
84 
88  protected $logger;
89 
99  public function __construct(
100  $url, array $options = [], $caller = __METHOD__, Profiler $profiler = null
101  ) {
102  $this->url = wfExpandUrl( $url, PROTO_HTTP );
103  $this->parsedUrl = wfParseUrl( $this->url );
104 
105  $this->logger = $options['logger'] ?? new NullLogger();
106 
107  if ( !$this->parsedUrl || !self::isValidURI( $this->url ) ) {
108  $this->status = StatusValue::newFatal( 'http-invalid-url', $url );
109  } else {
110  $this->status = StatusValue::newGood( 100 ); // continue
111  }
112 
113  if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
114  $this->timeout = $options['timeout'];
115  } else {
116  // The timeout should always be set by HttpRequestFactory, so this
117  // should only happen if the class was directly constructed
118  wfDeprecated( __METHOD__ . ' without the timeout option', '1.35' );
119  $httpTimeout = MediaWikiServices::getInstance()->getMainConfig()->get(
120  MainConfigNames::HTTPTimeout );
121  $this->timeout = $httpTimeout;
122  }
123  if ( isset( $options['connectTimeout'] ) && $options['connectTimeout'] != 'default' ) {
124  $this->connectTimeout = $options['connectTimeout'];
125  } else {
126  // The timeout should always be set by HttpRequestFactory, so this
127  // should only happen if the class was directly constructed
128  wfDeprecated( __METHOD__ . ' without the connectTimeout option', '1.35' );
129  $httpConnectTimeout = MediaWikiServices::getInstance()->getMainConfig()->get(
130  MainConfigNames::HTTPConnectTimeout );
131  $this->connectTimeout = $httpConnectTimeout;
132  }
133  if ( isset( $options['userAgent'] ) ) {
134  $this->setUserAgent( $options['userAgent'] );
135  }
136  if ( isset( $options['username'] ) && isset( $options['password'] ) ) {
137  $this->setHeader(
138  'Authorization',
139  'Basic ' . base64_encode( $options['username'] . ':' . $options['password'] )
140  );
141  }
142  if ( isset( $options['originalRequest'] ) ) {
143  $this->setOriginalRequest( $options['originalRequest'] );
144  }
145 
146  $this->setHeader( 'X-Request-Id', WebRequest::getRequestId() );
147 
148  $members = [ "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
149  "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" ];
150 
151  foreach ( $members as $o ) {
152  if ( isset( $options[$o] ) ) {
153  // ensure that MWHttpRequest::method is always
154  // uppercased. T38137
155  if ( $o == 'method' ) {
156  // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
157  $options[$o] = strtoupper( $options[$o] );
158  }
159  $this->$o = $options[$o];
160  }
161  }
162 
163  if ( $this->noProxy ) {
164  $this->proxy = ''; // noProxy takes precedence
165  }
166 
167  // Profile based on what's calling us
168  $this->profiler = $profiler;
169  $this->profileName = $caller;
170  }
171 
175  public function setLogger( LoggerInterface $logger ) {
176  $this->logger = $logger;
177  }
178 
184  public static function canMakeRequests() {
185  return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' );
186  }
187 
198  public static function factory( $url, array $options = null, $caller = __METHOD__ ) {
199  wfDeprecated( __METHOD__, '1.34' );
200  return MediaWikiServices::getInstance()->getHttpRequestFactory()
201  ->create( $url, $options ?? [], $caller );
202  }
203 
209  public function getContent() {
210  return $this->content;
211  }
212 
219  public function setData( array $args ) {
220  $this->postData = $args;
221  }
222 
228  protected function proxySetup() {
229  $httpProxy = MediaWikiServices::getInstance()->getMainConfig()->get(
230  MainConfigNames::HTTPProxy );
231  $localHTTPProxy = MediaWikiServices::getInstance()->getMainConfig()->get(
232  MainConfigNames::LocalHTTPProxy );
233  // If proxies are disabled, clear any other proxy
234  if ( $this->noProxy ) {
235  $this->proxy = '';
236  return;
237  }
238 
239  // If there is an explicit proxy already set, use it
240  if ( $this->proxy ) {
241  return;
242  }
243 
244  // Otherwise, fallback to $wgLocalHTTPProxy for local URLs
245  // or $wgHTTPProxy for everything else
246  if ( self::isLocalURL( $this->url ) ) {
247  if ( $localHTTPProxy !== false ) {
248  $this->setReverseProxy( $localHTTPProxy );
249  }
250  } else {
251  $this->proxy = (string)$httpProxy;
252  }
253  }
254 
265  protected function setReverseProxy( string $proxy ) {
266  $parsedProxy = wfParseUrl( $proxy );
267  if ( $parsedProxy === false ) {
268  throw new Exception( "Invalid reverseProxy configured: $proxy" );
269  }
270  // Set the current host in the Host header
271  $this->setHeader( 'Host', $this->parsedUrl['host'] );
272  // Replace scheme, host and port in the request
273  $this->parsedUrl['scheme'] = $parsedProxy['scheme'];
274  $this->parsedUrl['host'] = $parsedProxy['host'];
275  if ( isset( $parsedProxy['port'] ) ) {
276  $this->parsedUrl['port'] = $parsedProxy['port'];
277  } else {
278  unset( $this->parsedUrl['port'] );
279  }
280  $this->url = wfAssembleUrl( $this->parsedUrl );
281  // Mark that we're already using a proxy
282  $this->noProxy = true;
283  }
284 
291  private static function isLocalURL( $url ) {
292  $commandLineMode = MediaWikiServices::getInstance()->getMainConfig()->get( 'CommandLineMode' );
293  $localVirtualHosts = MediaWikiServices::getInstance()->getMainConfig()->get(
294  MainConfigNames::LocalVirtualHosts );
295  if ( $commandLineMode ) {
296  return false;
297  }
298 
299  // Extract host part
300  $matches = [];
301  if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) {
302  $host = $matches[1];
303  // Split up dotwise
304  $domainParts = explode( '.', $host );
305  // Check if this domain or any superdomain is listed as a local virtual host
306  $domainParts = array_reverse( $domainParts );
307 
308  $domain = '';
309  $countParts = count( $domainParts );
310  for ( $i = 0; $i < $countParts; $i++ ) {
311  $domainPart = $domainParts[$i];
312  if ( $i == 0 ) {
313  $domain = $domainPart;
314  } else {
315  $domain = $domainPart . '.' . $domain;
316  }
317 
318  if ( in_array( $domain, $localVirtualHosts ) ) {
319  return true;
320  }
321  }
322  }
323 
324  return false;
325  }
326 
330  public function setUserAgent( $UA ) {
331  $this->setHeader( 'User-Agent', $UA );
332  }
333 
339  public function setHeader( $name, $value ) {
340  // I feel like I should normalize the case here...
341  $this->reqHeaders[$name] = $value;
342  }
343 
348  protected function getHeaderList() {
349  $list = [];
350 
351  if ( $this->cookieJar ) {
352  $this->reqHeaders['Cookie'] =
353  $this->cookieJar->serializeToHttpRequest(
354  $this->parsedUrl['path'],
355  $this->parsedUrl['host']
356  );
357  }
358 
359  foreach ( $this->reqHeaders as $name => $value ) {
360  $list[] = "$name: $value";
361  }
362 
363  return $list;
364  }
365 
384  public function setCallback( $callback ) {
385  $this->doSetCallback( $callback );
386  }
387 
395  protected function doSetCallback( $callback ) {
396  if ( $callback === null ) {
397  $callback = [ $this, 'read' ];
398  } elseif ( !is_callable( $callback ) ) {
399  $this->status->fatal( 'http-internal-error' );
400  throw new InvalidArgumentException( __METHOD__ . ': invalid callback' );
401  }
402  $this->callback = $callback;
403  }
404 
414  public function read( $fh, $content ) {
415  $this->content .= $content;
416  return strlen( $content );
417  }
418 
425  public function execute() {
426  throw new LogicException( 'children must override this' );
427  }
428 
429  protected function prepare() {
430  $this->content = "";
431 
432  if ( strtoupper( $this->method ) == "HEAD" ) {
433  $this->headersOnly = true;
434  }
435 
436  $this->proxySetup(); // set up any proxy as needed
437 
438  if ( !$this->callback ) {
439  $this->doSetCallback( null );
440  }
441 
442  if ( !isset( $this->reqHeaders['User-Agent'] ) ) {
443  $http = MediaWikiServices::getInstance()->getHttpRequestFactory();
444  $this->setUserAgent( $http->getUserAgent() );
445  }
446  }
447 
453  protected function parseHeader() {
454  $lastname = "";
455 
456  // Failure without (valid) headers gets a response status of zero
457  if ( !$this->status->isOK() ) {
458  $this->respStatus = '0 Error';
459  }
460 
461  foreach ( $this->headerList as $header ) {
462  if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
463  $this->respVersion = $match[1];
464  $this->respStatus = $match[2];
465  } elseif ( preg_match( "#^[ \t]#", $header ) ) {
466  $last = count( $this->respHeaders[$lastname] ) - 1;
467  $this->respHeaders[$lastname][$last] .= "\r\n$header";
468  } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
469  $this->respHeaders[strtolower( $match[1] )][] = $match[2];
470  $lastname = strtolower( $match[1] );
471  }
472  }
473 
474  $this->parseCookies();
475  }
476 
484  protected function setStatus() {
485  if ( !$this->respHeaders ) {
486  $this->parseHeader();
487  }
488 
489  if ( (int)$this->respStatus > 0 && (int)$this->respStatus < 400 ) {
490  $this->status->setResult( true, (int)$this->respStatus );
491  } else {
492  [ $code, $message ] = explode( " ", $this->respStatus, 2 );
493  $this->status->setResult( false, (int)$this->respStatus );
494  $this->status->fatal( "http-bad-status", $code, $message );
495  }
496  }
497 
505  public function getStatus() {
506  if ( !$this->respHeaders ) {
507  $this->parseHeader();
508  }
509 
510  return (int)$this->respStatus;
511  }
512 
518  public function isRedirect() {
519  if ( !$this->respHeaders ) {
520  $this->parseHeader();
521  }
522 
523  $status = (int)$this->respStatus;
524 
525  if ( $status >= 300 && $status <= 303 ) {
526  return true;
527  }
528 
529  return false;
530  }
531 
541  public function getResponseHeaders() {
542  if ( !$this->respHeaders ) {
543  $this->parseHeader();
544  }
545 
546  return $this->respHeaders;
547  }
548 
555  public function getResponseHeader( $header ) {
556  if ( !$this->respHeaders ) {
557  $this->parseHeader();
558  }
559 
560  if ( isset( $this->respHeaders[strtolower( $header )] ) ) {
561  $v = $this->respHeaders[strtolower( $header )];
562  return $v[count( $v ) - 1];
563  }
564 
565  return null;
566  }
567 
575  public function setCookieJar( CookieJar $jar ) {
576  $this->cookieJar = $jar;
577  }
578 
584  public function getCookieJar() {
585  if ( !$this->respHeaders ) {
586  $this->parseHeader();
587  }
588 
589  return $this->cookieJar;
590  }
591 
601  public function setCookie( $name, $value, array $attr = [] ) {
602  if ( !$this->cookieJar ) {
603  $this->cookieJar = new CookieJar;
604  }
605 
606  if ( $this->parsedUrl && !isset( $attr['domain'] ) ) {
607  $attr['domain'] = $this->parsedUrl['host'];
608  }
609 
610  $this->cookieJar->setCookie( $name, $value, $attr );
611  }
612 
616  protected function parseCookies() {
617  if ( !$this->cookieJar ) {
618  $this->cookieJar = new CookieJar;
619  }
620 
621  if ( isset( $this->respHeaders['set-cookie'] ) ) {
622  $url = parse_url( $this->getFinalUrl() );
623  if ( !isset( $url['host'] ) ) {
624  $this->status->fatal( 'http-invalid-url', $url );
625  } else {
626  foreach ( $this->respHeaders['set-cookie'] as $cookie ) {
627  $this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] );
628  }
629  }
630  }
631  }
632 
649  public function getFinalUrl() {
650  $headers = $this->getResponseHeaders();
651 
652  // return full url (fix for incorrect but handled relative location)
653  if ( isset( $headers['location'] ) ) {
654  $locations = $headers['location'];
655  $domain = '';
656  $foundRelativeURI = false;
657  $countLocations = count( $locations );
658 
659  for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
660  $url = parse_url( $locations[$i] );
661 
662  if ( isset( $url['scheme'] ) && isset( $url['host'] ) ) {
663  $domain = $url['scheme'] . '://' . $url['host'];
664  break; // found correct URI (with host)
665  } else {
666  $foundRelativeURI = true;
667  }
668  }
669 
670  if ( !$foundRelativeURI ) {
671  return $locations[$countLocations - 1];
672  }
673  if ( $domain ) {
674  return $domain . $locations[$countLocations - 1];
675  }
676  $url = parse_url( $this->url );
677  if ( isset( $url['scheme'] ) && isset( $url['host'] ) ) {
678  return $url['scheme'] . '://' . $url['host'] .
679  $locations[$countLocations - 1];
680  }
681  }
682 
683  return $this->url;
684  }
685 
691  public function canFollowRedirects() {
692  return true;
693  }
694 
707  public function setOriginalRequest( $originalRequest ) {
708  if ( $originalRequest instanceof WebRequest ) {
709  $originalRequest = [
710  'ip' => $originalRequest->getIP(),
711  'userAgent' => $originalRequest->getHeader( 'User-Agent' ),
712  ];
713  } elseif (
714  !is_array( $originalRequest )
715  || array_diff( [ 'ip', 'userAgent' ], array_keys( $originalRequest ) )
716  ) {
717  throw new InvalidArgumentException( __METHOD__ . ': $originalRequest must be a '
718  . "WebRequest or an array with 'ip' and 'userAgent' keys" );
719  }
720 
721  $this->reqHeaders['X-Forwarded-For'] = $originalRequest['ip'];
722  $this->reqHeaders['X-Original-User-Agent'] = $originalRequest['userAgent'];
723  }
724 
741  public static function isValidURI( $uri ) {
742  return (bool)preg_match(
743  '/^https?:\/\/[^\/\s]\S*$/D',
744  $uri
745  );
746  }
747 }
const PROTO_HTTP
Definition: Defines.php:193
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
wfIniGetBool( $setting)
Safety wrapper around ini_get() for boolean settings.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
wfAssembleUrl( $urlParts)
This function will reassemble a URL parsed with wfParseURL.
$matches
Cookie jar to use with MWHttpRequest.
Definition: CookieJar.php:25
setCookie( $name, $value, $attr)
Set a cookie in the cookie jar.
Definition: CookieJar.php:36
This wrapper class will call out to curl (if available) or fallback to regular PHP if necessary for h...
getContent()
Get the body, or content, of the response to the request.
setLogger(LoggerInterface $logger)
setCookie( $name, $value, array $attr=[])
Sets a cookie.
getResponseHeaders()
Returns an associative array of response headers after the request has been executed.
doSetCallback( $callback)
Worker function for setting callbacks.
setHeader( $name, $value)
Set an arbitrary header.
getCookieJar()
Returns the cookie jar in use.
setReverseProxy(string $proxy)
Enable use of a reverse proxy in which the hostname is passed as a "Host" header, and the request is ...
setOriginalRequest( $originalRequest)
Set information about the original request.
string $profileName
isRedirect()
Returns true if the last status code was a redirect.
read( $fh, $content)
A generic callback to read the body of the response from a remote server.
getFinalUrl()
Returns the final URL after all redirections.
setStatus()
Sets HTTPRequest status member to a fatal value with the error message if the returned integer value ...
parseHeader()
Parses the headers, including the HTTP status code and any Set-Cookie headers.
canFollowRedirects()
Returns true if the backend can follow redirects.
__construct( $url, array $options=[], $caller=__METHOD__, Profiler $profiler=null)
setCallback( $callback)
Set a read callback to accept data read from the HTTP request.
static canMakeRequests()
Simple function to test if we can make any sort of requests at all, using cURL or fopen()
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
static isValidURI( $uri)
Check that the given URI is a valid one.
getStatus()
Get the integer value of the HTTP status code (e.g.
StatusValue $status
const SUPPORTS_FILE_POSTS
CookieJar $cookieJar
string[][] $respHeaders
execute()
Take care of whatever is necessary to perform the URI request.
getResponseHeader( $header)
Returns the value of the given response header.
LoggerInterface $logger
proxySetup()
Take care of setting up the proxy (do nothing if "noProxy" is set)
setData(array $args)
Set the parameters of the request.
parseCookies()
Parse the cookies in the response headers and store them in the cookie jar.
int string $timeout
getHeaderList()
Get an array of the headers.
callable $callback
Profiler $profiler
setCookieJar(CookieJar $jar)
Tells the MWHttpRequest object to use this pre-loaded CookieJar.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Profiler base class that defines the interface and some shared functionality.
Definition: Profiler.php:37
static newFatal( $message,... $parameters)
Factory function for fatal errors.
Definition: StatusValue.php:73
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:85
The WebRequest class encapsulates getting at data passed in the URL or via a POSTed form stripping il...
Definition: WebRequest.php:49
static getRequestId()
Get the current request ID.
Definition: WebRequest.php:344
$header