download.scm 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
  4. ;;; Copyright © 2017 Tobias Geerinckx-Rice <me@tobias.gr>
  5. ;;; Copyright © 2021 Timothy Sample <samplet@ngyro.com>
  6. ;;;
  7. ;;; This file is part of GNU Guix.
  8. ;;;
  9. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  10. ;;; under the terms of the GNU General Public License as published by
  11. ;;; the Free Software Foundation; either version 3 of the License, or (at
  12. ;;; your option) any later version.
  13. ;;;
  14. ;;; GNU Guix is distributed in the hope that it will be useful, but
  15. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. ;;; GNU General Public License for more details.
  18. ;;;
  19. ;;; You should have received a copy of the GNU General Public License
  20. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  21. (define-module (guix build download)
  22. #:use-module (web uri)
  23. #:use-module (web http)
  24. #:use-module ((web client) #:hide (open-socket-for-uri))
  25. #:use-module (web response)
  26. #:use-module (guix base64)
  27. #:use-module (guix ftp-client)
  28. #:use-module (guix build utils)
  29. #:use-module (guix progress)
  30. #:use-module (rnrs io ports)
  31. #:use-module (rnrs bytevectors)
  32. #:use-module (srfi srfi-1)
  33. #:use-module (srfi srfi-11)
  34. #:use-module (srfi srfi-19)
  35. #:use-module (srfi srfi-26)
  36. #:autoload (ice-9 ftw) (scandir)
  37. #:autoload (guix base16) (bytevector->base16-string)
  38. #:autoload (guix swh) (swh-download-directory)
  39. #:use-module (ice-9 match)
  40. #:use-module (ice-9 format)
  41. #:export (open-socket-for-uri
  42. open-connection-for-uri
  43. http-fetch
  44. %x509-certificate-directory
  45. close-connection
  46. resolve-uri-reference
  47. maybe-expand-mirrors
  48. url-fetch
  49. byte-count->string
  50. uri-abbreviation
  51. nar-uri-abbreviation
  52. store-path-abbreviation))
  53. ;;; Commentary:
  54. ;;;
  55. ;;; Fetch data such as tarballs over HTTP or FTP (builder-side code).
  56. ;;;
  57. ;;; Code:
  58. (define %http-receive-buffer-size
  59. ;; Size of the HTTP receive buffer.
  60. 65536)
  61. (define* (ellipsis #:optional (port (current-output-port)))
  62. "Make a rough guess at whether Unicode's HORIZONTAL ELLIPSIS can be written
  63. in PORT's encoding, and return either that or ASCII dots."
  64. (if (equal? (port-encoding port) "UTF-8")
  65. "…"
  66. "..."))
  67. (define* (store-path-abbreviation store-path #:optional (prefix-length 6))
  68. "If STORE-PATH is the file name of a store entry, return an abbreviation of
  69. STORE-PATH for display, showing PREFIX-LENGTH characters of the hash.
  70. Otherwise return STORE-PATH."
  71. (if (string-prefix? (%store-directory) store-path)
  72. (let ((base (basename store-path)))
  73. (string-append (string-take base prefix-length)
  74. (ellipsis)
  75. (string-drop base 32)))
  76. store-path))
  77. (define* (uri-abbreviation uri #:optional (max-length 42))
  78. "If URI's string representation is larger than MAX-LENGTH, return an
  79. abbreviation of URI showing the scheme, host, and basename of the file."
  80. (define uri-as-string
  81. (uri->string uri))
  82. (define (elide-path)
  83. (let* ((path (uri-path uri))
  84. (base (basename path))
  85. (prefix (string-append (symbol->string (uri-scheme uri)) "://"
  86. ;; `file' URIs have no host part.
  87. (or (uri-host uri) "")
  88. (string-append "/" (ellipsis) "/"))))
  89. (if (> (+ (string-length prefix) (string-length base)) max-length)
  90. (string-append prefix (ellipsis)
  91. (string-drop base (quotient (string-length base) 2)))
  92. (string-append prefix base))))
  93. (if (> (string-length uri-as-string) max-length)
  94. (let ((short (elide-path)))
  95. (if (< (string-length short) (string-length uri-as-string))
  96. short
  97. uri-as-string))
  98. uri-as-string))
  99. (define (nar-uri-abbreviation uri)
  100. "Abbreviate URI, which is assumed to be the URI of a nar as served by Hydra
  101. and 'guix publish', something like
  102. \"http://example.org/nar/1ldrllwbna0aw5z8kpci4fsvbd2w8cw4-texlive-bin-2015\"."
  103. (let* ((uri (if (string? uri) (string->uri uri) uri))
  104. (path (basename (uri-path uri))))
  105. (if (and (> (string-length path) 33)
  106. (char=? (string-ref path 32) #\-))
  107. (string-drop path 33)
  108. path)))
  109. (define* (ftp-fetch uri file #:key timeout print-build-trace?)
  110. "Fetch data from URI and write it to FILE. Return FILE on success. Bail
  111. out if the connection could not be established in less than TIMEOUT seconds."
  112. (let* ((conn (match (and=> (uri-userinfo uri)
  113. (cut string-split <> #\:))
  114. (((? string? user))
  115. (ftp-open (uri-host uri) #:timeout timeout
  116. #:username user))
  117. (((? string? user) (? string? pass))
  118. (ftp-open (uri-host uri) #:timeout timeout
  119. #:username user
  120. #:password pass))
  121. (_ (ftp-open (uri-host uri) #:timeout timeout))))
  122. (size (false-if-exception (ftp-size conn (uri-path uri))))
  123. (in (ftp-retr conn (basename (uri-path uri))
  124. (dirname (uri-path uri))
  125. #:timeout timeout)))
  126. (call-with-output-file file
  127. (lambda (out)
  128. (dump-port* in out
  129. #:buffer-size %http-receive-buffer-size
  130. #:reporter
  131. (if print-build-trace?
  132. (progress-reporter/trace
  133. file (uri->string uri) size)
  134. (progress-reporter/file
  135. (uri-abbreviation uri) size)))))
  136. (ftp-close conn)
  137. (unless print-build-trace?
  138. (newline))
  139. file))
  140. ;; Autoload GnuTLS so that this module can be used even when GnuTLS is
  141. ;; not available. At compile time, this yields "possibly unbound
  142. ;; variable" warnings, but these are OK: we know that the variables will
  143. ;; be bound if we need them, because (guix download) adds GnuTLS as an
  144. ;; input in that case.
  145. (define (load-gnutls)
  146. ;; XXX: Use this hack instead of #:autoload to avoid compilation errors.
  147. ;; See <http://bugs.gnu.org/12202>.
  148. (module-use! (resolve-module '(guix build download))
  149. (resolve-interface '(gnutls)))
  150. (set! load-gnutls (const #t)))
  151. (define %x509-certificate-directory
  152. ;; The directory where X.509 authority PEM certificates are stored.
  153. (make-parameter (or (getenv "GUIX_TLS_CERTIFICATE_DIRECTORY")
  154. (getenv "SSL_CERT_DIR") ;like OpenSSL
  155. "/etc/ssl/certs")))
  156. (define (set-certificate-credentials-x509-trust-file!* cred file format)
  157. "Like 'set-certificate-credentials-x509-trust-file!', but without the file
  158. name decoding bug described at
  159. <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=26948#17>."
  160. (let ((data (call-with-input-file file get-bytevector-all)))
  161. (set-certificate-credentials-x509-trust-data! cred data format)))
  162. (define (make-credendials-with-ca-trust-files directory)
  163. "Return certificate credentials with X.509 authority certificates read from
  164. DIRECTORY. Those authority certificates are checked when
  165. 'peer-certificate-status' is later called."
  166. (let ((cred (make-certificate-credentials))
  167. (files (match (scandir directory (cut string-suffix? ".pem" <>))
  168. ((or #f ())
  169. ;; Some distros provide nothing but bundles (*.crt) under
  170. ;; /etc/ssl/certs, so look for them.
  171. (or (scandir directory (cut string-suffix? ".crt" <>))
  172. '()))
  173. (pem pem))))
  174. (for-each (lambda (file)
  175. (let ((file (string-append directory "/" file)))
  176. ;; Protect against dangling symlinks.
  177. (when (file-exists? file)
  178. (set-certificate-credentials-x509-trust-file!*
  179. cred file
  180. x509-certificate-format/pem))))
  181. files)
  182. cred))
  183. (define (peer-certificate session)
  184. "Return the certificate of the remote peer in SESSION."
  185. (match (session-peer-certificate-chain session)
  186. ((first _ ...)
  187. (import-x509-certificate first x509-certificate-format/der))))
  188. (define (assert-valid-server-certificate session server)
  189. "Return #t if the certificate of the remote peer for SESSION is a valid
  190. certificate for SERVER, where SERVER is the expected host name of peer."
  191. (define cert
  192. (peer-certificate session))
  193. ;; First check whether the server's certificate matches SERVER.
  194. (unless (x509-certificate-matches-hostname? cert server)
  195. (throw 'tls-certificate-error 'host-mismatch cert server))
  196. ;; Second check its validity and reachability from the set of authority
  197. ;; certificates loaded via 'set-certificate-credentials-x509-trust-file!'.
  198. (match (peer-certificate-status session)
  199. (() ;certificate is valid
  200. #t)
  201. ((statuses ...)
  202. (throw 'tls-certificate-error 'invalid-certificate cert server
  203. statuses))))
  204. (define (print-tls-certificate-error port key args default-printer)
  205. "Print the TLS certificate error represented by ARGS in an intelligible
  206. way."
  207. (match args
  208. (('host-mismatch cert server)
  209. (format port
  210. "X.509 server certificate for '~a' does not match: ~a~%"
  211. server (x509-certificate-dn cert)))
  212. (('invalid-certificate cert server statuses)
  213. (format port
  214. "X.509 certificate of '~a' could not be verified:~%~{ ~a~%~}"
  215. server
  216. (map certificate-status->string statuses)))))
  217. (set-exception-printer! 'tls-certificate-error
  218. print-tls-certificate-error)
  219. (define* (tls-wrap port server #:key (verify-certificate? #t))
  220. "Return PORT wrapped in a TLS connection to SERVER. SERVER must be a DNS
  221. host name without trailing dot."
  222. (define (log level str)
  223. (format (current-error-port)
  224. "gnutls: [~a|~a] ~a" (getpid) level str))
  225. (load-gnutls)
  226. (let ((session (make-session connection-end/client))
  227. (ca-certs (%x509-certificate-directory)))
  228. ;; Some servers such as 'cloud.github.com' require the client to support
  229. ;; the 'SERVER NAME' extension. However, 'set-session-server-name!' is
  230. ;; not available in older GnuTLS releases. See
  231. ;; <http://bugs.gnu.org/18526> for details.
  232. (if (module-defined? (resolve-interface '(gnutls))
  233. 'set-session-server-name!)
  234. (set-session-server-name! session server-name-type/dns server)
  235. (format (current-error-port)
  236. "warning: TLS 'SERVER NAME' extension not supported~%"))
  237. (set-session-transport-fd! session (fileno port))
  238. (set-session-default-priority! session)
  239. ;; The "%COMPAT" bit allows us to work around firewall issues (info
  240. ;; "(gnutls) Priority Strings"); see <http://bugs.gnu.org/23311>.
  241. ;; Explicitly disable SSLv3, which is insecure:
  242. ;; <https://tools.ietf.org/html/rfc7568>.
  243. (set-session-priorities! session "NORMAL:%COMPAT:-VERS-SSL3.0")
  244. (set-session-credentials! session
  245. (if (and verify-certificate? ca-certs)
  246. (make-credendials-with-ca-trust-files
  247. ca-certs)
  248. (make-certificate-credentials)))
  249. ;; Uncomment the following lines in case of debugging emergency.
  250. ;;(set-log-level! 10)
  251. ;;(set-log-procedure! log)
  252. (let loop ((retries 5))
  253. (catch 'gnutls-error
  254. (lambda ()
  255. (handshake session))
  256. (lambda (key err proc . rest)
  257. (cond ((eq? err error/warning-alert-received)
  258. ;; Like Wget, do no stop upon non-fatal alerts such as
  259. ;; 'alert-description/unrecognized-name'.
  260. (format (current-error-port)
  261. "warning: TLS warning alert received: ~a~%"
  262. (alert-description->string (alert-get session)))
  263. (handshake session))
  264. (else
  265. (if (or (fatal-error? err) (zero? retries))
  266. (apply throw key err proc rest)
  267. (begin
  268. ;; We got 'error/again' or similar; try again.
  269. (format (current-error-port)
  270. "warning: TLS non-fatal error: ~a~%"
  271. (error->string err))
  272. (loop (- retries 1)))))))))
  273. ;; Verify the server's certificate if needed.
  274. (when verify-certificate?
  275. (catch 'tls-certificate-error
  276. (lambda ()
  277. (assert-valid-server-certificate session server))
  278. (lambda args
  279. (close-port port)
  280. (apply throw args))))
  281. (let ((record (session-record-port session)))
  282. (define (read! bv start count)
  283. (define read
  284. (catch 'gnutls-error
  285. (lambda ()
  286. (get-bytevector-n! record bv start count))
  287. (lambda (key err proc . rest)
  288. ;; When responding to "Connection: close" requests, some
  289. ;; servers close the connection abruptly after sending the
  290. ;; response body, without doing a proper TLS connection
  291. ;; termination. Treat it as EOF.
  292. (if (eq? err error/premature-termination)
  293. the-eof-object
  294. (apply throw key err proc rest)))))
  295. (if (eof-object? read)
  296. 0
  297. read))
  298. (define (write! bv start count)
  299. (put-bytevector record bv start count)
  300. (force-output record)
  301. count)
  302. (define (get-position)
  303. (port-position record))
  304. (define (set-position! new-position)
  305. (set-port-position! record new-position))
  306. (define (close)
  307. (unless (port-closed? port)
  308. (close-port port))
  309. (unless (port-closed? record)
  310. (close-port record)))
  311. (define (unbuffered port)
  312. (setvbuf port 'none)
  313. port)
  314. (setvbuf record 'block)
  315. ;; Return a port that wraps RECORD to ensure that closing it also
  316. ;; closes PORT, the actual socket port, and its file descriptor.
  317. ;; Make sure it does not introduce extra buffering (custom ports
  318. ;; are buffered by default as of Guile 3.0.5).
  319. ;; XXX: This wrapper would be unnecessary if GnuTLS could
  320. ;; automatically close SESSION's file descriptor when RECORD is
  321. ;; closed, but that doesn't seem to be possible currently (as of
  322. ;; 3.6.9).
  323. (unbuffered
  324. (make-custom-binary-input/output-port "gnutls wrapped port" read! write!
  325. get-position set-position!
  326. close)))))
  327. (define (ensure-uri uri-or-string) ;XXX: copied from (web http)
  328. (cond
  329. ((string? uri-or-string) (string->uri uri-or-string))
  330. ((uri? uri-or-string) uri-or-string)
  331. (else (error "Invalid URI" uri-or-string))))
  332. (define* (open-socket-for-uri uri-or-string #:key timeout)
  333. "Return an open input/output port for a connection to URI. When TIMEOUT is
  334. not #f, it must be a (possibly inexact) number denoting the maximum duration
  335. in seconds to wait for the connection to complete; passed TIMEOUT, an
  336. ETIMEDOUT error is raised."
  337. ;; Includes a fix for <http://bugs.gnu.org/15368> which affects Guile's
  338. ;; 'open-socket-for-uri' up to 2.0.11 included, uses 'connect*' instead
  339. ;; of 'connect', and uses AI_ADDRCONFIG.
  340. (define http-proxy (current-http-proxy))
  341. (define uri (ensure-uri (or http-proxy uri-or-string)))
  342. (define addresses
  343. (let ((port (uri-port uri)))
  344. (delete-duplicates
  345. (getaddrinfo (uri-host uri)
  346. (cond (port => number->string)
  347. (else (symbol->string (uri-scheme uri))))
  348. (if (number? port)
  349. (logior AI_ADDRCONFIG AI_NUMERICSERV)
  350. AI_ADDRCONFIG))
  351. (lambda (ai1 ai2)
  352. (equal? (addrinfo:addr ai1) (addrinfo:addr ai2))))))
  353. (let loop ((addresses addresses))
  354. (let* ((ai (car addresses))
  355. (s (with-fluids ((%default-port-encoding #f))
  356. ;; Restrict ourselves to TCP.
  357. (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP))))
  358. (catch 'system-error
  359. (lambda ()
  360. (connect* s (addrinfo:addr ai) timeout)
  361. ;; Buffer input and output on this port.
  362. (setvbuf s 'block)
  363. ;; If we're using a proxy, make a note of that.
  364. (when http-proxy (set-http-proxy-port?! s #t))
  365. s)
  366. (lambda args
  367. ;; Connection failed, so try one of the other addresses.
  368. (close s)
  369. (if (null? (cdr addresses))
  370. (apply throw args)
  371. (loop (cdr addresses))))))))
  372. (define (setup-http-tunnel port uri)
  373. "Establish over PORT an HTTP tunnel to the destination server of URI."
  374. (define target
  375. (string-append (uri-host uri) ":"
  376. (number->string
  377. (or (uri-port uri)
  378. (match (uri-scheme uri)
  379. ('http 80)
  380. ('https 443))))))
  381. (format port "CONNECT ~a HTTP/1.1\r\n" target)
  382. (format port "Host: ~a\r\n\r\n" target)
  383. (force-output port)
  384. (read-response port))
  385. (define* (open-connection-for-uri uri
  386. #:key
  387. timeout
  388. (verify-certificate? #t))
  389. "Like 'open-socket-for-uri', but also handle HTTPS connections. The
  390. resulting port must be closed with 'close-connection'. When
  391. VERIFY-CERTIFICATE? is true, verify HTTPS server certificates."
  392. ;; Note: Guile 2.2.0's (web client) has a same-named export that's actually
  393. ;; undefined. See Guile commit 011669af3b428e5626f7bbf66b11d57d9768c047.
  394. (define https?
  395. (eq? 'https (uri-scheme uri)))
  396. (define https-proxy (let ((proxy (getenv "https_proxy")))
  397. (and (not (equal? proxy ""))
  398. proxy)))
  399. (let-syntax ((with-https-proxy
  400. (syntax-rules ()
  401. ((_ exp)
  402. ;; For HTTPS URIs, honor 'https_proxy', not 'http_proxy'.
  403. (let ((thunk (lambda () exp)))
  404. (if (and https?
  405. (module-variable
  406. (resolve-interface '(web client))
  407. 'current-http-proxy))
  408. (parameterize ((current-http-proxy https-proxy))
  409. (thunk))
  410. (thunk)))))))
  411. (with-https-proxy
  412. (let ((s (open-socket-for-uri uri #:timeout timeout)))
  413. ;; Buffer input and output on this port.
  414. (setvbuf s 'block %http-receive-buffer-size)
  415. (when (and https? https-proxy)
  416. (setup-http-tunnel s uri))
  417. (if https?
  418. (tls-wrap s (uri-host uri)
  419. #:verify-certificate? verify-certificate?)
  420. s)))))
  421. (define (close-connection port) ;deprecated
  422. (unless (port-closed? port)
  423. (close-port port)))
  424. ;; XXX: This is an awful hack to make sure the (set-port-encoding! p
  425. ;; "ISO-8859-1") call in `read-response' passes, even during bootstrap
  426. ;; where iconv is not available.
  427. (module-define! (resolve-module '(web response))
  428. 'set-port-encoding!
  429. (lambda (p e) #f))
  430. (define (resolve-uri-reference ref base)
  431. "Resolve the URI reference REF, interpreted relative to the BASE URI, into a
  432. target URI, according to the algorithm specified in RFC 3986 section 5.2.2.
  433. Return the resulting target URI."
  434. (define (merge-paths base-path rel-path)
  435. (let* ((base-components (string-split base-path #\/))
  436. (base-directory-components (match base-components
  437. ((components ... last) components)
  438. (() '())))
  439. (base-directory (string-join base-directory-components "/")))
  440. (string-append base-directory "/" rel-path)))
  441. (define (remove-dot-segments path)
  442. (let loop ((in
  443. ;; Drop leading "." and ".." components from a relative path.
  444. ;; (absolute paths will start with a "" component)
  445. (drop-while (match-lambda
  446. ((or "." "..") #t)
  447. (_ #f))
  448. (string-split path #\/)))
  449. (out '()))
  450. (match in
  451. (("." . rest)
  452. (loop rest out))
  453. ((".." . rest)
  454. (match out
  455. ((or () (""))
  456. (error "remove-dot-segments: too many '..' components" path))
  457. (_
  458. (loop rest (cdr out)))))
  459. ((component . rest)
  460. (loop rest (cons component out)))
  461. (()
  462. (string-join (reverse out) "/")))))
  463. (cond ((or (uri-scheme ref)
  464. (uri-host ref))
  465. (build-uri (or (uri-scheme ref)
  466. (uri-scheme base))
  467. #:userinfo (uri-userinfo ref)
  468. #:host (uri-host ref)
  469. #:port (uri-port ref)
  470. #:path (remove-dot-segments (uri-path ref))
  471. #:query (uri-query ref)
  472. #:fragment (uri-fragment ref)))
  473. ((string-null? (uri-path ref))
  474. (build-uri (uri-scheme base)
  475. #:userinfo (uri-userinfo base)
  476. #:host (uri-host base)
  477. #:port (uri-port base)
  478. #:path (remove-dot-segments (uri-path base))
  479. #:query (or (uri-query ref)
  480. (uri-query base))
  481. #:fragment (uri-fragment ref)))
  482. (else
  483. (build-uri (uri-scheme base)
  484. #:userinfo (uri-userinfo base)
  485. #:host (uri-host base)
  486. #:port (uri-port base)
  487. #:path (remove-dot-segments
  488. (if (string-prefix? "/" (uri-path ref))
  489. (uri-path ref)
  490. (merge-paths (uri-path base)
  491. (uri-path ref))))
  492. #:query (uri-query ref)
  493. #:fragment (uri-fragment ref)))))
  494. (define* (http-fetch uri #:key timeout (verify-certificate? #t))
  495. "Return an input port containing the data at URI, and the expected number of
  496. bytes available or #f. When TIMEOUT is true, bail out if the connection could
  497. not be established in less than TIMEOUT seconds. When VERIFY-CERTIFICATE? is
  498. true, verify HTTPS certificates; otherwise simply ignore them."
  499. (define headers
  500. `(;; Some web sites, such as http://dist.schmorp.de, would block you if
  501. ;; there's no 'User-Agent' header, presumably on the assumption that
  502. ;; you're a spammer. So work around that.
  503. (User-Agent . "GNU Guile")
  504. ;; Some servers, such as https://alioth.debian.org, return "406 Not
  505. ;; Acceptable" when not explicitly told that everything is accepted.
  506. (Accept . "*/*")
  507. ;; Basic authentication, if needed.
  508. ,@(match (uri-userinfo uri)
  509. ((? string? str)
  510. `((Authorization . ,(string-append "Basic "
  511. (base64-encode
  512. (string->utf8 str))))))
  513. (_ '()))))
  514. (let*-values (((connection)
  515. (open-connection-for-uri uri
  516. #:timeout timeout
  517. #:verify-certificate?
  518. verify-certificate?))
  519. ((resp port)
  520. (http-get uri #:port connection #:decode-body? #f
  521. #:streaming? #t
  522. #:headers headers))
  523. ((code)
  524. (response-code resp)))
  525. (case code
  526. ((200) ; OK
  527. (values port (response-content-length resp)))
  528. ((301 ; moved permanently
  529. 302 ; found (redirection)
  530. 303 ; see other
  531. 307 ; temporary redirection
  532. 308) ; permanent redirection
  533. (let ((uri (resolve-uri-reference (response-location resp) uri)))
  534. (format #t "following redirection to `~a'...~%"
  535. (uri->string uri))
  536. (close connection)
  537. (http-fetch uri
  538. #:timeout timeout
  539. #:verify-certificate? verify-certificate?)))
  540. (else
  541. (error "download failed" (uri->string uri)
  542. code (response-reason-phrase resp))))))
  543. (define-syntax-rule (false-if-exception* body ...)
  544. "Like `false-if-exception', but print the exception on the error port."
  545. (catch #t
  546. (lambda ()
  547. body ...)
  548. (lambda (key . args)
  549. #f)
  550. (lambda (key . args)
  551. (print-exception (current-error-port) #f key args))))
  552. (define (uri-vicinity dir file)
  553. "Concatenate DIR, slash, and FILE, keeping only one slash in between.
  554. This is required by some HTTP servers."
  555. (string-append (string-trim-right dir #\/) "/"
  556. (string-trim file #\/)))
  557. (define (maybe-expand-mirrors uri mirrors)
  558. "If URI uses the 'mirror' scheme, expand it according to the MIRRORS alist.
  559. Return a list of URIs."
  560. (case (uri-scheme uri)
  561. ((mirror)
  562. (let ((kind (string->symbol (uri-host uri)))
  563. (path (uri-path uri)))
  564. (match (assoc-ref mirrors kind)
  565. ((mirrors ..1)
  566. (map (compose string->uri (cut uri-vicinity <> path))
  567. mirrors))
  568. (_
  569. (error "unsupported URL mirror kind" kind uri)))))
  570. (else
  571. (list uri))))
  572. (define* (disarchive-fetch/any uris file
  573. #:key (timeout 10) (verify-certificate? #t))
  574. "Fetch a Disarchive specification from any of URIS, assemble it,
  575. and write the output to FILE."
  576. (define (fetch-specification uris)
  577. (any (lambda (uri)
  578. (false-if-exception*
  579. (let-values (((port size) (http-fetch uri
  580. #:verify-certificate?
  581. verify-certificate?
  582. #:timeout timeout)))
  583. (let ((specification (read port)))
  584. (close-port port)
  585. specification))))
  586. uris))
  587. (define (resolve addresses output)
  588. (any (match-lambda
  589. (('swhid swhid)
  590. (match (string-split swhid #\:)
  591. (("swh" "1" "dir" id)
  592. (format #t "Downloading ~a from Software Heritage...~%" file)
  593. (false-if-exception*
  594. (swh-download-directory id output)))
  595. (_ #f)))
  596. (_ #f))
  597. addresses))
  598. (format #t "Trying to use Disarchive to assemble ~a...~%" file)
  599. (match (and=> (resolve-module '(disarchive) #:ensure #f)
  600. (lambda (disarchive)
  601. (cons (module-ref disarchive '%disarchive-log-port)
  602. (module-ref disarchive 'disarchive-assemble))))
  603. (#f (format #t "could not load Disarchive~%")
  604. #f)
  605. ((%disarchive-log-port . disarchive-assemble)
  606. (match (fetch-specification uris)
  607. (#f (format #t "could not find its Disarchive specification~%")
  608. #f)
  609. (spec (parameterize ((%disarchive-log-port (current-output-port)))
  610. (false-if-exception*
  611. (disarchive-assemble spec file #:resolver resolve))))))))
  612. (define* (url-fetch url file
  613. #:key
  614. (timeout 10) (verify-certificate? #t)
  615. (mirrors '()) (content-addressed-mirrors '())
  616. (disarchive-mirrors '())
  617. (hashes '())
  618. print-build-trace?)
  619. "Fetch FILE from URL; URL may be either a single string, or a list of
  620. string denoting alternate URLs for FILE. Return #f on failure, and FILE
  621. on success.
  622. When MIRRORS is defined, it must be an alist of mirrors; it is used to resolve
  623. 'mirror://' URIs.
  624. HASHES must be a list of algorithm/hash pairs, where each algorithm is a
  625. symbol such as 'sha256 and each hash is a bytevector.
  626. CONTENT-ADDRESSED-MIRRORS must be a list of procedures that, given a hash
  627. algorithm and a hash, return a URL where the specified data can be retrieved
  628. or #f.
  629. When VERIFY-CERTIFICATE? is true, validate HTTPS server certificates;
  630. otherwise simply ignore them."
  631. (define uri
  632. (append-map (cut maybe-expand-mirrors <> mirrors)
  633. (match url
  634. ((_ ...) (map string->uri url))
  635. (_ (list (string->uri url))))))
  636. (define (fetch uri file)
  637. (format #t "~%Starting download of ~a~%From ~a...~%"
  638. file (uri->string uri))
  639. (case (uri-scheme uri)
  640. ((http https)
  641. (false-if-exception*
  642. (let-values (((port size)
  643. (http-fetch uri
  644. #:verify-certificate? verify-certificate?
  645. #:timeout timeout)))
  646. (call-with-output-file file
  647. (lambda (output)
  648. (dump-port* port output
  649. #:buffer-size %http-receive-buffer-size
  650. #:reporter (if print-build-trace?
  651. (progress-reporter/trace
  652. file (uri->string uri) size)
  653. (progress-reporter/file
  654. (uri-abbreviation uri) size)))
  655. (newline)))
  656. file)))
  657. ((ftp)
  658. (false-if-exception* (ftp-fetch uri file
  659. #:timeout timeout
  660. #:print-build-trace?
  661. print-build-trace?)))
  662. (else
  663. (format #t "skipping URI with unsupported scheme: ~s~%"
  664. uri)
  665. #f)))
  666. (define content-addressed-uris
  667. (append-map (lambda (make-url)
  668. (filter-map (match-lambda
  669. ((hash-algo . hash)
  670. (let ((file (strip-store-file-name file)))
  671. (string->uri (make-url file hash-algo hash)))))
  672. hashes))
  673. content-addressed-mirrors))
  674. (define disarchive-uris
  675. (append-map (match-lambda
  676. ((? string? mirror)
  677. (map (match-lambda
  678. ((hash-algo . hash)
  679. (string->uri
  680. (string-append mirror
  681. (symbol->string hash-algo) "/"
  682. (bytevector->base16-string hash)))))
  683. hashes)))
  684. disarchive-mirrors))
  685. ;; Make this unbuffered so 'progress-report/file' works as expected. 'line
  686. ;; means '\n', not '\r', so it's not appropriate here.
  687. (setvbuf (current-output-port) 'none)
  688. (setvbuf (current-error-port) 'line)
  689. (let try ((uri (append uri content-addressed-uris)))
  690. (match uri
  691. ((uri tail ...)
  692. (or (fetch uri file)
  693. (try tail)))
  694. (()
  695. ;; If we are looking for a software archive, one last thing we
  696. ;; can try is to use Disarchive to assemble it.
  697. (or (disarchive-fetch/any disarchive-uris file
  698. #:verify-certificate? verify-certificate?
  699. #:timeout timeout)
  700. (begin
  701. (format (current-error-port) "failed to download ~s from ~s~%"
  702. file url)
  703. ;; Remove FILE in case we made an incomplete download, for
  704. ;; example due to ENOSPC.
  705. (catch 'system-error
  706. (lambda ()
  707. (delete-file file))
  708. (const #f))
  709. #f))))))
  710. ;;; download.scm ends here