http-client.scm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2012-2018, 2020-2022 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
  4. ;;; Copyright © 2012, 2015 Free Software Foundation, Inc.
  5. ;;; Copyright © 2017 Tobias Geerinckx-Rice <me@tobias.gr>
  6. ;;;
  7. ;;; This file is part of GNU Guix.
  8. ;;;
  9. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  10. ;;; under the terms of the GNU General Public License as published by
  11. ;;; the Free Software Foundation; either version 3 of the License, or (at
  12. ;;; your option) any later version.
  13. ;;;
  14. ;;; GNU Guix is distributed in the hope that it will be useful, but
  15. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. ;;; GNU General Public License for more details.
  18. ;;;
  19. ;;; You should have received a copy of the GNU General Public License
  20. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  21. (define-module (guix http-client)
  22. #:use-module (web uri)
  23. #:use-module (web http)
  24. #:use-module ((web client) #:hide (open-socket-for-uri))
  25. #:use-module (web request)
  26. #:use-module (web response)
  27. #:use-module (srfi srfi-1)
  28. #:use-module (srfi srfi-11)
  29. #:use-module (srfi srfi-19)
  30. #:use-module (srfi srfi-26)
  31. #:use-module (srfi srfi-34)
  32. #:use-module (srfi srfi-35)
  33. #:use-module (ice-9 match)
  34. #:use-module (ice-9 binary-ports)
  35. #:use-module (rnrs bytevectors)
  36. #:use-module (guix ui)
  37. #:use-module (guix utils)
  38. #:use-module (guix base64)
  39. #:autoload (gcrypt hash) (sha256)
  40. #:autoload (gnutls) (error/invalid-session error/again error/interrupted)
  41. #:use-module ((guix build utils)
  42. #:select (mkdir-p dump-port))
  43. #:use-module ((guix build download)
  44. #:select (open-socket-for-uri
  45. (open-connection-for-uri
  46. . guix:open-connection-for-uri)
  47. resolve-uri-reference))
  48. #:re-export (open-socket-for-uri)
  49. #:export (&http-get-error
  50. http-get-error?
  51. http-get-error-uri
  52. http-get-error-code
  53. http-get-error-reason
  54. http-get-error-headers
  55. http-fetch
  56. http-multiple-get
  57. %http-cache-ttl
  58. http-fetch/cached))
  59. ;;; Commentary:
  60. ;;;
  61. ;;; HTTP client portable among Guile versions, and with proper error condition
  62. ;;; reporting.
  63. ;;;
  64. ;;; Code:
  65. ;; HTTP GET error.
  66. (define-condition-type &http-get-error &error
  67. http-get-error?
  68. (uri http-get-error-uri) ;URI
  69. (code http-get-error-code) ;integer
  70. (reason http-get-error-reason) ;string
  71. (headers http-get-error-headers)) ;alist
  72. (define* (http-fetch uri #:key port (text? #f) (buffered? #t)
  73. (open-connection guix:open-connection-for-uri)
  74. (keep-alive? #f)
  75. (verify-certificate? #t)
  76. (headers '((user-agent . "GNU Guile")))
  77. (log-port (current-error-port))
  78. timeout)
  79. "Return an input port containing the data at URI, and the expected number of
  80. bytes available or #f. If TEXT? is true, the data at URI is considered to be
  81. textual. Follow any HTTP redirection. When BUFFERED? is #f, return an
  82. unbuffered port, suitable for use in `filtered-port'. HEADERS is an alist of
  83. extra HTTP headers.
  84. When KEEP-ALIVE? is true, the connection is marked as 'keep-alive' and PORT is
  85. not closed upon completion.
  86. When VERIFY-CERTIFICATE? is true, verify HTTPS server certificates.
  87. TIMEOUT specifies the timeout in seconds for connection establishment; when
  88. TIMEOUT is #f, connection establishment never times out.
  89. Write information about redirects to LOG-PORT.
  90. Raise an '&http-get-error' condition if downloading fails."
  91. (define uri*
  92. (if (string? uri) (string->uri uri) uri))
  93. (let loop ((uri uri*)
  94. (port (or port (open-connection uri*
  95. #:verify-certificate?
  96. verify-certificate?
  97. #:timeout timeout))))
  98. (let ((headers (match (uri-userinfo uri)
  99. ((? string? str)
  100. (cons (cons 'Authorization
  101. (string-append "Basic "
  102. (base64-encode
  103. (string->utf8 str))))
  104. headers))
  105. (_ headers))))
  106. (unless (or buffered? (not (file-port? port)))
  107. (setvbuf port 'none))
  108. (let*-values (((resp data)
  109. (http-get uri #:streaming? #t #:port port
  110. #:keep-alive? keep-alive?
  111. #:headers headers))
  112. ((code)
  113. (response-code resp)))
  114. (case code
  115. ((200)
  116. (values data (response-content-length resp)))
  117. ((301 ; moved permanently
  118. 302 ; found (redirection)
  119. 303 ; see other
  120. 307 ; temporary redirection
  121. 308) ; permanent redirection
  122. (let ((host (uri-host uri))
  123. (uri (resolve-uri-reference (response-location resp) uri)))
  124. (if keep-alive?
  125. (dump-port data (%make-void-port "w0")
  126. (response-content-length resp))
  127. (close-port port))
  128. (format log-port (G_ "following redirection to `~a'...~%")
  129. (uri->string uri))
  130. (loop uri
  131. (or (and keep-alive?
  132. (or (not (uri-host uri))
  133. (string=? host (uri-host uri)))
  134. port)
  135. (open-connection uri
  136. #:verify-certificate?
  137. verify-certificate?
  138. #:timeout timeout)))))
  139. (else
  140. (raise (condition (&http-get-error
  141. (uri uri)
  142. (code code)
  143. (reason (response-reason-phrase resp))
  144. (headers (response-headers resp)))
  145. (&message
  146. (message
  147. (format
  148. #f
  149. (G_ "~a: HTTP download failed: ~a (~s)")
  150. (uri->string uri) code
  151. (response-reason-phrase resp))))))))))))
  152. (define-syntax-rule (false-if-networking-error exp)
  153. "Return #f if EXP triggers a network related exception as can occur when
  154. reusing stale cached connections."
  155. ;; FIXME: Duplicated from 'with-cached-connection'.
  156. (catch #t
  157. (lambda ()
  158. exp)
  159. (lambda (key . args)
  160. ;; If PORT was cached and the server closed the connection in the
  161. ;; meantime, we get EPIPE. In that case, open a fresh connection and
  162. ;; retry. We might also get 'bad-response or a similar exception from
  163. ;; (web response) later on, once we've sent the request, or a
  164. ;; ERROR/INVALID-SESSION from GnuTLS.
  165. (if (or (and (eq? key 'system-error)
  166. (= EPIPE (system-error-errno `(,key ,@args))))
  167. (and (eq? key 'gnutls-error)
  168. (memq (first args)
  169. (list error/invalid-session
  170. ;; XXX: These two are not properly handled in
  171. ;; GnuTLS < 3.7.2, in
  172. ;; 'write_to_session_record_port'; see
  173. ;; <https://bugs.gnu.org/47867>.
  174. error/again error/interrupted)))
  175. (memq key
  176. '(bad-response bad-header bad-header-component)))
  177. #f
  178. (apply throw key args)))))
  179. (define* (http-multiple-get base-uri proc seed requests
  180. #:key port (verify-certificate? #t)
  181. (open-connection guix:open-connection-for-uri)
  182. (keep-alive? #t)
  183. (batch-size 1000))
  184. "Send all of REQUESTS to the server at BASE-URI. Call PROC for each
  185. response, passing it the request object, the response, a port from which to
  186. read the response body, and the previous result, starting with SEED, à la
  187. 'fold'. Return the final result.
  188. When PORT is specified, use it as the initial connection on which HTTP
  189. requests are sent; otherwise call OPEN-CONNECTION to open a new connection for
  190. a URI. When KEEP-ALIVE? is false, close the connection port before
  191. returning."
  192. (let connect ((port port)
  193. (requests requests)
  194. (result seed))
  195. (define batch
  196. (if (>= batch-size (length requests))
  197. requests
  198. (take requests batch-size)))
  199. ;; (format (current-error-port) "connecting (~a requests left)..."
  200. ;; (length requests))
  201. (let ((p (or port (open-connection base-uri
  202. #:verify-certificate?
  203. verify-certificate?))))
  204. ;; For HTTPS, P is not a file port and does not support 'setvbuf'.
  205. (when (file-port? p)
  206. (setvbuf p 'block (expt 2 16)))
  207. ;; Send BATCH in a row.
  208. ;; XXX: Do our own caching to work around inefficiencies when
  209. ;; communicating over TLS: <http://bugs.gnu.org/22966>.
  210. (let-values (((buffer get) (open-bytevector-output-port)))
  211. ;; Inherit the HTTP proxying property from P.
  212. (set-http-proxy-port?! buffer (http-proxy-port? p))
  213. ;; Swallow networking errors that could occur due to connection reuse
  214. ;; and the like; they will be handled down the road when trying to
  215. ;; read responses.
  216. (false-if-networking-error
  217. (begin
  218. (for-each (cut write-request <> buffer) batch)
  219. (put-bytevector p (get))
  220. (force-output p))))
  221. ;; Now start processing responses.
  222. (let loop ((sent batch)
  223. (processed 0)
  224. (result result))
  225. (match sent
  226. (()
  227. (match (drop requests processed)
  228. (()
  229. (unless keep-alive?
  230. (close-port p))
  231. (reverse result))
  232. (remainder
  233. (connect p remainder result))))
  234. ((head tail ...)
  235. (match (false-if-networking-error (read-response p))
  236. ((? response? resp)
  237. (let* ((body (response-body-port resp))
  238. (result (proc head resp body result)))
  239. ;; The server can choose to stop responding at any time,
  240. ;; in which case we have to try again. Check whether
  241. ;; that is the case. Note that even upon "Connection:
  242. ;; close", we can read from BODY.
  243. (match (assq 'connection (response-headers resp))
  244. (('connection 'close)
  245. (close-port p)
  246. (connect #f ;try again
  247. (drop requests (+ 1 processed))
  248. result))
  249. (_
  250. (loop tail (+ 1 processed) result)))))
  251. (#f
  252. (close-port p)
  253. (connect #f ; try again
  254. (drop requests processed)
  255. result)))))))))
  256. ;;;
  257. ;;; Caching.
  258. ;;;
  259. (define %http-cache-ttl
  260. ;; Time-to-live in seconds of the HTTP cache of in ~/.cache/guix.
  261. (make-parameter
  262. (* 3600 (or (and=> (getenv "GUIX_HTTP_CACHE_TTL")
  263. string->number*)
  264. 36))))
  265. (define (cache-file-for-uri uri)
  266. "Return the name of the file in the cache corresponding to URI."
  267. (let ((digest (sha256 (string->utf8 (uri->string uri)))))
  268. ;; Use the "URL" alphabet because it does not contain "/".
  269. (string-append (cache-directory) "/http/"
  270. (base64-encode digest 0 (bytevector-length digest)
  271. #f #f base64url-alphabet))))
  272. (define* (http-fetch/cached uri #:key (ttl (%http-cache-ttl)) text?
  273. (headers '((user-agent . "GNU Guile")))
  274. (write-cache dump-port)
  275. (cache-miss (const #t))
  276. (log-port (current-error-port))
  277. (timeout 10))
  278. "Like 'http-fetch', return an input port, but cache its contents in
  279. ~/.cache/guix. The cache remains valid for TTL seconds.
  280. Call WRITE-CACHE with the HTTP input port and the cache output port to write
  281. the data to cache. Call CACHE-MISS with URI just before fetching data from
  282. URI.
  283. HEADERS is an alist of extra HTTP headers, to which cache-related headers are
  284. added automatically as appropriate.
  285. TIMEOUT specifies the timeout in seconds for connection establishment.
  286. Write information about redirects to LOG-PORT."
  287. (let* ((uri (if (string? uri)
  288. (string->uri uri)
  289. uri))
  290. (file (cache-file-for-uri uri)))
  291. (define (update-cache cache-port)
  292. (define cache-time
  293. (and cache-port
  294. (stat:mtime (stat cache-port))))
  295. (define extended-headers
  296. (if cache-time
  297. `((if-modified-since
  298. . ,(time-utc->date (make-time time-utc 0 cache-time)))
  299. ,@headers)
  300. headers))
  301. ;; Update the cache and return an input port.
  302. (guard (c ((http-get-error? c)
  303. (if (= 304 (http-get-error-code c)) ;"Not Modified"
  304. (begin
  305. (utime file) ;update FILE's mtime
  306. cache-port)
  307. (raise c))))
  308. (let ((port (http-fetch uri #:text? text?
  309. #:log-port log-port
  310. #:headers extended-headers
  311. #:timeout timeout)))
  312. (cache-miss uri)
  313. (mkdir-p (dirname file))
  314. (when cache-port
  315. (close-port cache-port))
  316. (with-atomic-file-output file
  317. (cut write-cache port <>))
  318. (close-port port)
  319. (open-input-file file))))
  320. (define (old? port)
  321. ;; Return true if PORT has passed TTL.
  322. (let* ((s (stat port))
  323. (now (current-time time-utc)))
  324. (< (+ (stat:mtime s) ttl) (time-second now))))
  325. (catch 'system-error
  326. (lambda ()
  327. (let ((port (open-input-file file)))
  328. (if (old? port)
  329. (update-cache port)
  330. port)))
  331. (lambda args
  332. (if (= ENOENT (system-error-errno args))
  333. (update-cache #f)
  334. (apply throw args))))))
  335. ;;; http-client.scm ends here