123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421 |
- <?php
- /**
- * StatusNet, the distributed open-source microblogging tool
- *
- * Utility for doing HTTP-related things
- *
- * PHP version 5
- *
- * LICENCE: This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * @category Action
- * @package StatusNet
- * @author Evan Prodromou <evan@status.net>
- * @copyright 2009 StatusNet, Inc.
- * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
- * @link http://status.net/
- */
- if (!defined('GNUSOCIAL')) { exit(1); }
- /**
- * Useful structure for HTTP responses
- *
- * We make HTTP calls in several places, and we have several different
- * ways of doing them. This class hides the specifics of what underlying
- * library (curl or PHP-HTTP or whatever) that's used.
- *
- * This extends the HTTP_Request2_Response class with methods to get info
- * about any followed redirects.
- *
- * Originally used the name 'HTTPResponse' to match earlier code, but
- * this conflicts with a class in in the PECL HTTP extension.
- *
- * @category HTTP
- * @package StatusNet
- * @author Evan Prodromou <evan@status.net>
- * @author Brion Vibber <brion@status.net>
- * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
- * @link http://status.net/
- */
- class GNUsocial_HTTPResponse extends HTTP_Request2_Response
- {
- function __construct(HTTP_Request2_Response $response, $url, $redirects=0)
- {
- foreach (get_object_vars($response) as $key => $val) {
- $this->$key = $val;
- }
- $this->url = strval($url);
- $this->redirectCount = intval($redirects);
- }
- /**
- * Get the count of redirects that have been followed, if any.
- * @return int
- */
- function getRedirectCount()
- {
- return $this->redirectCount;
- }
- /**
- * Gets the target URL, before any redirects. Use getEffectiveUrl() for final target.
- * @return string URL
- */
- function getUrl()
- {
- return $this->url;
- }
- /**
- * Check if the response is OK, generally a 200 or other 2xx status code.
- * @return bool
- */
- function isOk()
- {
- $status = $this->getStatus();
- return ($status >= 200 && $status < 300);
- }
- }
- /**
- * Utility class for doing HTTP client stuff
- *
- * We make HTTP calls in several places, and we have several different
- * ways of doing them. This class hides the specifics of what underlying
- * library (curl or PHP-HTTP or whatever) that's used.
- *
- * This extends the PEAR HTTP_Request2 package:
- * - sends StatusNet-specific User-Agent header
- * - 'follow_redirects' config option, defaulting on
- * - 'max_redirs' config option, defaulting to 10
- * - extended response class adds getRedirectCount() and getUrl() methods
- * - get() and post() convenience functions return body content directly
- *
- * @category HTTP
- * @package StatusNet
- * @author Evan Prodromou <evan@status.net>
- * @author Brion Vibber <brion@status.net>
- * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
- * @link http://status.net/
- */
- class HTTPClient extends HTTP_Request2
- {
- function __construct($url=null, $method=self::METHOD_GET, $config=array())
- {
- if (is_int(common_config('http', 'timeout'))) {
- // Reasonably you shouldn't set http/timeout to 0 because of
- // malicious remote servers that can cause infinitely long
- // responses... But the default in HTTP_Request2 is 0 for
- // some reason and should probably be considered a valid value.
- $this->config['timeout'] = common_config('http', 'timeout');
- } else {
- common_log(LOG_ERR, 'config option http/timeout is not an integer value: '._ve(common_config('http', 'timeout')));
- }
- if (!empty(common_config('http', 'connect_timeout'))) {
- $this->config['connect_timeout'] = common_config('http', 'connect_timeout');
- }
- $this->config['max_redirs'] = 10;
- $this->config['follow_redirects'] = true;
- // We've had some issues with keepalive breaking with
- // HEAD requests, such as to youtube which seems to be
- // emitting chunked encoding info for an empty body
- // instead of not emitting anything. This may be a
- // bug on YouTube's end, but the upstream libray
- // ought to be investigated to see if we can handle
- // it gracefully in that case as well.
- $this->config['protocol_version'] = '1.0';
- // Default state of OpenSSL seems to have no trusted
- // SSL certificate authorities, which breaks hostname
- // verification and means we have a hard time communicating
- // with other sites' HTTPS interfaces.
- //
- // Turn off verification unless we've configured a CA bundle.
- if (common_config('http', 'ssl_cafile')) {
- $this->config['ssl_cafile'] = common_config('http', 'ssl_cafile');
- } else {
- $this->config['ssl_verify_peer'] = false;
- }
- // This means "verify the cert hostname against what we connect to", it does not
- // imply CA trust or anything like that. Just the hostname.
- $this->config['ssl_verify_host'] = common_config('http', 'ssl_verify_host');
- if (common_config('http', 'curl') && extension_loaded('curl')) {
- $this->config['adapter'] = 'HTTP_Request2_Adapter_Curl';
- }
- foreach (array('host', 'port', 'user', 'password', 'auth_scheme') as $cf) {
- $k = 'proxy_'.$cf;
- $v = common_config('http', $k);
- if (!empty($v)) {
- $this->config[$k] = $v;
- }
- }
- parent::__construct($url, $method, $config);
- $this->setHeader('User-Agent', self::userAgent());
- }
- /**
- * Convenience/back-compat instantiator
- * @return HTTPClient
- */
- public static function start()
- {
- return new HTTPClient();
- }
- /**
- * Quick static function to GET a URL
- */
- public static function quickGet($url, $accept=null, array $params=array(), array $headers=array())
- {
- if (!empty($params)) {
- $params = http_build_query($params, null, '&');
- if (strpos($url, '?') === false) {
- $url .= '?' . $params;
- } else {
- $url .= '&' . $params;
- }
- }
- $client = new HTTPClient();
- if (!is_null($accept)) {
- $client->setHeader('Accept', $accept);
- }
- $response = $client->get($url, $headers);
- if (!$response->isOk()) {
- // TRANS: Exception. %s is the URL we tried to GET.
- throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
- }
- return $response->getBody();
- }
- public static function quickGetJson($url, $params=array())
- {
- $data = json_decode(self::quickGet($url, null, $params));
- if (is_null($data)) {
- common_debug('Could not decode JSON data from URL: '.$url);
- throw new ServerException('Could not decode JSON data from URL');
- }
- return $data;
- }
- /**
- * If you want an Accept header, put it in $headers
- */
- public static function quickHead($url, array $params=array(), array $headers=array())
- {
- if (!empty($params)) {
- $params = http_build_query($params, null, '&');
- if (strpos($url, '?') === false) {
- $url .= '?' . $params;
- } else {
- $url .= '&' . $params;
- }
- }
- $client = new HTTPClient();
- $response = $client->head($url, $headers);
- if (!$response->isOk()) {
- // TRANS: Exception. %s is the URL we tried to GET.
- throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
- }
- return $response->getHeader();
- }
- /**
- * Convenience function to run a GET request.
- *
- * @return GNUsocial_HTTPResponse
- * @throws HTTP_Request2_Exception
- */
- public function get($url, $headers=array())
- {
- return $this->doRequest($url, self::METHOD_GET, $headers);
- }
- /**
- * Convenience function to run a HEAD request.
- *
- * NOTE: Will probably turn into a GET request if you let it follow redirects!
- * That option is only there to be flexible and may be removed in the future!
- *
- * @return GNUsocial_HTTPResponse
- * @throws HTTP_Request2_Exception
- */
- public function head($url, $headers=array(), $follow_redirects=false)
- {
- // Save the configured value for follow_redirects
- $old_follow = $this->config['follow_redirects'];
- try {
- // Temporarily (possibly) override the follow_redirects setting
- $this->config['follow_redirects'] = $follow_redirects;
- return $this->doRequest($url, self::METHOD_HEAD, $headers);
- } catch (Exception $e) {
- // Let the exception go on its merry way.
- throw $e;
- } finally {
- // reset to the old value
- $this->config['follow_redirects'] = $old_follow;
- }
- //we've either returned or thrown exception here
- }
- /**
- * Convenience function to POST form data.
- *
- * @param string $url
- * @param array $headers optional associative array of HTTP headers
- * @param array $data optional associative array or blob of form data to submit
- * @return GNUsocial_HTTPResponse
- * @throws HTTP_Request2_Exception
- */
- public function post($url, $headers=array(), $data=array())
- {
- if ($data) {
- $this->addPostParameter($data);
- }
- return $this->doRequest($url, self::METHOD_POST, $headers);
- }
- /**
- * @param string $url The URL including possible querystring
- * @param string $method The HTTP method to use
- * @param array $headers List of already formatted strings
- * (not an associative array, to allow
- * multiple same-named headers)
- *
- * @return GNUsocial_HTTPResponse
- * @throws HTTP_Request2_Exception
- */
- protected function doRequest($url, $method, array $headers=array())
- {
- $this->setUrl($url);
- // Workaround for HTTP_Request2 not setting up SNI in socket contexts;
- // This fixes cert validation for SSL virtual hosts using SNI.
- // Requires PHP 5.3.2 or later and OpenSSL with SNI support.
- if ($this->url->getScheme() == 'https' && defined('OPENSSL_TLSEXT_SERVER_NAME')) {
- $this->config['ssl_SNI_enabled'] = true;
- $this->config['ssl_SNI_server_name'] = $this->url->getHost();
- }
- $this->setMethod($method);
- foreach ($headers as $header) {
- $this->setHeader($header);
- }
- $response = $this->send();
- if (is_null($response)) {
- // TRANS: Failed to retrieve a remote web resource, %s is the target URL.
- throw new NoHttpResponseException($url);
- }
- return $response;
- }
- protected function log($level, $detail) {
- $method = $this->getMethod();
- $url = $this->getUrl();
- common_log($level, __CLASS__ . ": HTTP $method $url - $detail");
- }
- /**
- * Pulls up GNU Social's customized user-agent string, so services
- * we hit can track down the responsible software.
- *
- * @return string
- */
- static public function userAgent()
- {
- return GNUSOCIAL_ENGINE . '/' . GNUSOCIAL_VERSION
- . ' (' . GNUSOCIAL_CODENAME . ')';
- }
- /**
- * Actually performs the HTTP request and returns a
- * GNUsocial_HTTPResponse object with response body and header info.
- *
- * Wraps around parent send() to add logging and redirection processing.
- *
- * @return GNUsocial_HTTPResponse
- * @throw HTTP_Request2_Exception
- */
- public function send()
- {
- $maxRedirs = intval($this->config['max_redirs']);
- if (empty($this->config['max_redirs'])) {
- $maxRedirs = 0;
- }
- $redirs = 0;
- $redirUrls = array();
- do {
- try {
- $response = parent::send();
- } catch (Exception $e) {
- $this->log(LOG_ERR, $e->getMessage());
- throw $e;
- }
- $code = $response->getStatus();
- $effectiveUrl = $response->getEffectiveUrl();
- $redirUrls[] = $effectiveUrl;
- $response->redirUrls = $redirUrls;
- if ($code >= 200 && $code < 300) {
- $reason = $response->getReasonPhrase();
- $this->log(LOG_INFO, "$code $reason");
- } elseif ($code >= 300 && $code < 400) {
- $url = $this->getUrl();
- $target = $response->getHeader('Location');
- if (++$redirs >= $maxRedirs) {
- common_log(LOG_ERR, __CLASS__ . ": Too many redirects: skipping $code redirect from $url to $target");
- break;
- }
- try {
- $this->setUrl($target);
- $this->setHeader('Referer', $url);
- common_log(LOG_INFO, __CLASS__ . ": Following $code redirect from $url to $target");
- continue;
- } catch (HTTP_Request2_Exception $e) {
- common_log(LOG_ERR, __CLASS__ . ": Invalid $code redirect from $url to $target");
- }
- } else {
- $reason = $response->getReasonPhrase();
- $this->log(LOG_ERR, "$code $reason");
- }
- break;
- } while ($maxRedirs);
- return new GNUsocial_HTTPResponse($response, $this->getUrl(), $redirs);
- }
- public static function get_filename(string $url, array $headers = null) : ?string {
- if ($headers === null) {
- $head = (new HTTPClient())->head($url);
- $headers = $head->getHeader();
- $headers = array_change_key_case($headers, CASE_LOWER);
- }
- if (array_key_exists('content-disposition', $headers) &&
- preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'], $matches) === 1) {
- return $matches[1];
- } else {
- common_log(LOG_INFO, "Couldn't determine filename for url: {$url}");
- return null;
- }
- }
- }
|