download.scm 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
  4. ;;; Copyright © 2017 Tobias Geerinckx-Rice <me@tobias.gr>
  5. ;;; Copyright © 2021 Timothy Sample <samplet@ngyro.com>
  6. ;;;
  7. ;;; This file is part of GNU Guix.
  8. ;;;
  9. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  10. ;;; under the terms of the GNU General Public License as published by
  11. ;;; the Free Software Foundation; either version 3 of the License, or (at
  12. ;;; your option) any later version.
  13. ;;;
  14. ;;; GNU Guix is distributed in the hope that it will be useful, but
  15. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. ;;; GNU General Public License for more details.
  18. ;;;
  19. ;;; You should have received a copy of the GNU General Public License
  20. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  21. (define-module (guix build download)
  22. #:use-module (web uri)
  23. #:use-module (web http)
  24. #:use-module ((web client) #:hide (open-socket-for-uri))
  25. #:use-module (web response)
  26. #:use-module (guix base64)
  27. #:use-module (guix ftp-client)
  28. #:use-module (guix build utils)
  29. #:use-module (guix progress)
  30. #:use-module (rnrs io ports)
  31. #:use-module (rnrs bytevectors)
  32. #:use-module (srfi srfi-1)
  33. #:use-module (srfi srfi-11)
  34. #:use-module (srfi srfi-19)
  35. #:use-module (srfi srfi-26)
  36. #:autoload (ice-9 ftw) (scandir)
  37. #:autoload (guix base16) (bytevector->base16-string)
  38. #:autoload (guix swh) (swh-download-directory)
  39. #:use-module (ice-9 match)
  40. #:use-module (ice-9 format)
  41. #:export (open-socket-for-uri
  42. open-connection-for-uri
  43. http-fetch
  44. %x509-certificate-directory
  45. close-connection
  46. resolve-uri-reference
  47. maybe-expand-mirrors
  48. url-fetch
  49. byte-count->string
  50. uri-abbreviation
  51. nar-uri-abbreviation
  52. store-path-abbreviation))
  53. ;;; Commentary:
  54. ;;;
  55. ;;; Fetch data such as tarballs over HTTP or FTP (builder-side code).
  56. ;;;
  57. ;;; Code:
  58. (define %http-receive-buffer-size
  59. ;; Size of the HTTP receive buffer.
  60. 65536)
  61. (define* (ellipsis #:optional (port (current-output-port)))
  62. "Make a rough guess at whether Unicode's HORIZONTAL ELLIPSIS can be written
  63. in PORT's encoding, and return either that or ASCII dots."
  64. (if (equal? (port-encoding port) "UTF-8")
  65. "…"
  66. "..."))
  67. (define* (store-path-abbreviation store-path #:optional (prefix-length 6))
  68. "If STORE-PATH is the file name of a store entry, return an abbreviation of
  69. STORE-PATH for display, showing PREFIX-LENGTH characters of the hash.
  70. Otherwise return STORE-PATH."
  71. (if (string-prefix? (%store-directory) store-path)
  72. (let ((base (basename store-path)))
  73. (string-append (string-take base prefix-length)
  74. (ellipsis)
  75. (string-drop base 32)))
  76. store-path))
  77. (define* (uri-abbreviation uri #:optional (max-length 42))
  78. "If URI's string representation is larger than MAX-LENGTH, return an
  79. abbreviation of URI showing the scheme, host, and basename of the file."
  80. (define uri-as-string
  81. (uri->string uri))
  82. (define (elide-path)
  83. (let* ((path (uri-path uri))
  84. (base (basename path))
  85. (prefix (string-append (symbol->string (uri-scheme uri)) "://"
  86. ;; `file' URIs have no host part.
  87. (or (uri-host uri) "")
  88. (string-append "/" (ellipsis) "/"))))
  89. (if (> (+ (string-length prefix) (string-length base)) max-length)
  90. (string-append prefix (ellipsis)
  91. (string-drop base (quotient (string-length base) 2)))
  92. (string-append prefix base))))
  93. (if (> (string-length uri-as-string) max-length)
  94. (let ((short (elide-path)))
  95. (if (< (string-length short) (string-length uri-as-string))
  96. short
  97. uri-as-string))
  98. uri-as-string))
  99. (define (nar-uri-abbreviation uri)
  100. "Abbreviate URI, which is assumed to be the URI of a nar as served by Hydra
  101. and 'guix publish', something like
  102. \"http://example.org/nar/1ldrllwbna0aw5z8kpci4fsvbd2w8cw4-texlive-bin-2015\"."
  103. (let* ((uri (if (string? uri) (string->uri uri) uri))
  104. (path (basename (uri-path uri))))
  105. (if (and (> (string-length path) 33)
  106. (char=? (string-ref path 32) #\-))
  107. (string-drop path 33)
  108. path)))
  109. (define* (ftp-fetch uri file #:key timeout print-build-trace?)
  110. "Fetch data from URI and write it to FILE. Return FILE on success. Bail
  111. out if the connection could not be established in less than TIMEOUT seconds."
  112. (let* ((conn (match (and=> (uri-userinfo uri)
  113. (cut string-split <> #\:))
  114. (((? string? user))
  115. (ftp-open (uri-host uri) #:timeout timeout
  116. #:username user))
  117. (((? string? user) (? string? pass))
  118. (ftp-open (uri-host uri) #:timeout timeout
  119. #:username user
  120. #:password pass))
  121. (_ (ftp-open (uri-host uri) #:timeout timeout))))
  122. (size (false-if-exception (ftp-size conn (uri-path uri))))
  123. (in (ftp-retr conn (basename (uri-path uri))
  124. (dirname (uri-path uri))
  125. #:timeout timeout)))
  126. (call-with-output-file file
  127. (lambda (out)
  128. (dump-port* in out
  129. #:buffer-size %http-receive-buffer-size
  130. #:reporter
  131. (if print-build-trace?
  132. (progress-reporter/trace
  133. file (uri->string uri) size)
  134. (progress-reporter/file
  135. (uri-abbreviation uri) size)))))
  136. (ftp-close conn)
  137. (unless print-build-trace?
  138. (newline))
  139. file))
  140. ;; Autoload GnuTLS so that this module can be used even when GnuTLS is
  141. ;; not available. At compile time, this yields "possibly unbound
  142. ;; variable" warnings, but these are OK: we know that the variables will
  143. ;; be bound if we need them, because (guix download) adds GnuTLS as an
  144. ;; input in that case.
  145. (define (load-gnutls)
  146. ;; XXX: Use this hack instead of #:autoload to avoid compilation errors.
  147. ;; See <http://bugs.gnu.org/12202>.
  148. (module-use! (resolve-module '(guix build download))
  149. (resolve-interface '(gnutls)))
  150. (set! load-gnutls (const #t)))
  151. (define %x509-certificate-directory
  152. ;; The directory where X.509 authority PEM certificates are stored.
  153. (make-parameter (or (getenv "GUIX_TLS_CERTIFICATE_DIRECTORY")
  154. (getenv "SSL_CERT_DIR") ;like OpenSSL
  155. "/etc/ssl/certs")))
  156. (define (set-certificate-credentials-x509-trust-file!* cred file format)
  157. "Like 'set-certificate-credentials-x509-trust-file!', but without the file
  158. name decoding bug described at
  159. <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=26948#17>."
  160. (let ((data (call-with-input-file file get-bytevector-all)))
  161. (set-certificate-credentials-x509-trust-data! cred data format)))
  162. (define (make-credendials-with-ca-trust-files directory)
  163. "Return certificate credentials with X.509 authority certificates read from
  164. DIRECTORY. Those authority certificates are checked when
  165. 'peer-certificate-status' is later called."
  166. (let ((cred (make-certificate-credentials))
  167. (files (match (scandir directory (cut string-suffix? ".pem" <>))
  168. ((or #f ())
  169. ;; Some distros provide nothing but bundles (*.crt) under
  170. ;; /etc/ssl/certs, so look for them.
  171. (or (scandir directory (cut string-suffix? ".crt" <>))
  172. '()))
  173. (pem pem))))
  174. (for-each (lambda (file)
  175. (let ((file (string-append directory "/" file)))
  176. ;; Protect against dangling symlinks.
  177. (when (file-exists? file)
  178. (set-certificate-credentials-x509-trust-file!*
  179. cred file
  180. x509-certificate-format/pem))))
  181. files)
  182. cred))
  183. (define (peer-certificate session)
  184. "Return the certificate of the remote peer in SESSION."
  185. (match (session-peer-certificate-chain session)
  186. ((first _ ...)
  187. (import-x509-certificate first x509-certificate-format/der))))
  188. (define (assert-valid-server-certificate session server)
  189. "Return #t if the certificate of the remote peer for SESSION is a valid
  190. certificate for SERVER, where SERVER is the expected host name of peer."
  191. (define cert
  192. (peer-certificate session))
  193. ;; First check whether the server's certificate matches SERVER.
  194. (unless (x509-certificate-matches-hostname? cert server)
  195. (throw 'tls-certificate-error 'host-mismatch cert server))
  196. ;; Second check its validity and reachability from the set of authority
  197. ;; certificates loaded via 'set-certificate-credentials-x509-trust-file!'.
  198. (match (peer-certificate-status session)
  199. (() ;certificate is valid
  200. #t)
  201. ((statuses ...)
  202. (throw 'tls-certificate-error 'invalid-certificate cert server
  203. statuses))))
  204. (define (print-tls-certificate-error port key args default-printer)
  205. "Print the TLS certificate error represented by ARGS in an intelligible
  206. way."
  207. (match args
  208. (('host-mismatch cert server)
  209. (format port
  210. "X.509 server certificate for '~a' does not match: ~a~%"
  211. server (x509-certificate-dn cert)))
  212. (('invalid-certificate cert server statuses)
  213. (format port
  214. "X.509 certificate of '~a' could not be verified:~%~{ ~a~%~}"
  215. server
  216. (map certificate-status->string statuses)))))
  217. (set-exception-printer! 'tls-certificate-error
  218. print-tls-certificate-error)
  219. (define* (tls-wrap port server #:key (verify-certificate? #t))
  220. "Return PORT wrapped in a TLS connection to SERVER. SERVER must be a DNS
  221. host name without trailing dot."
  222. (define (log level str)
  223. (format (current-error-port)
  224. "gnutls: [~a|~a] ~a" (getpid) level str))
  225. (load-gnutls)
  226. (let ((session (make-session connection-end/client))
  227. (ca-certs (%x509-certificate-directory)))
  228. ;; Some servers such as 'cloud.github.com' require the client to support
  229. ;; the 'SERVER NAME' extension. However, 'set-session-server-name!' is
  230. ;; not available in older GnuTLS releases. See
  231. ;; <http://bugs.gnu.org/18526> for details.
  232. (if (module-defined? (resolve-interface '(gnutls))
  233. 'set-session-server-name!)
  234. (set-session-server-name! session server-name-type/dns server)
  235. (format (current-error-port)
  236. "warning: TLS 'SERVER NAME' extension not supported~%"))
  237. (set-session-transport-fd! session (fileno port))
  238. (set-session-default-priority! session)
  239. ;; The "%COMPAT" bit allows us to work around firewall issues (info
  240. ;; "(gnutls) Priority Strings"); see <http://bugs.gnu.org/23311>.
  241. ;; Explicitly disable SSLv3, which is insecure:
  242. ;; <https://tools.ietf.org/html/rfc7568>.
  243. (set-session-priorities! session "NORMAL:%COMPAT:-VERS-SSL3.0")
  244. (set-session-credentials! session
  245. (if (and verify-certificate? ca-certs)
  246. (make-credendials-with-ca-trust-files
  247. ca-certs)
  248. (make-certificate-credentials)))
  249. ;; Uncomment the following lines in case of debugging emergency.
  250. ;;(set-log-level! 10)
  251. ;;(set-log-procedure! log)
  252. (catch 'gnutls-error
  253. (lambda ()
  254. (handshake session))
  255. (lambda (key err proc . rest)
  256. (cond ((eq? err error/warning-alert-received)
  257. ;; Like Wget, do no stop upon non-fatal alerts such as
  258. ;; 'alert-description/unrecognized-name'.
  259. (format (current-error-port)
  260. "warning: TLS warning alert received: ~a~%"
  261. (alert-description->string (alert-get session)))
  262. (handshake session))
  263. (else
  264. ;; XXX: We'd use 'gnutls_error_is_fatal' but (gnutls) doesn't
  265. ;; provide a binding for this.
  266. (apply throw key err proc rest)))))
  267. ;; Verify the server's certificate if needed.
  268. (when verify-certificate?
  269. (catch 'tls-certificate-error
  270. (lambda ()
  271. (assert-valid-server-certificate session server))
  272. (lambda args
  273. (close-port port)
  274. (apply throw args))))
  275. (let ((record (session-record-port session)))
  276. (define (read! bv start count)
  277. (define read
  278. (catch 'gnutls-error
  279. (lambda ()
  280. (get-bytevector-n! record bv start count))
  281. (lambda (key err proc . rest)
  282. ;; When responding to "Connection: close" requests, some
  283. ;; servers close the connection abruptly after sending the
  284. ;; response body, without doing a proper TLS connection
  285. ;; termination. Treat it as EOF.
  286. (if (eq? err error/premature-termination)
  287. the-eof-object
  288. (apply throw key err proc rest)))))
  289. (if (eof-object? read)
  290. 0
  291. read))
  292. (define (write! bv start count)
  293. (put-bytevector record bv start count)
  294. (force-output record)
  295. count)
  296. (define (get-position)
  297. (port-position record))
  298. (define (set-position! new-position)
  299. (set-port-position! record new-position))
  300. (define (close)
  301. (unless (port-closed? port)
  302. (close-port port))
  303. (unless (port-closed? record)
  304. (close-port record)))
  305. (define (unbuffered port)
  306. (setvbuf port 'none)
  307. port)
  308. (setvbuf record 'block)
  309. ;; Return a port that wraps RECORD to ensure that closing it also
  310. ;; closes PORT, the actual socket port, and its file descriptor.
  311. ;; Make sure it does not introduce extra buffering (custom ports
  312. ;; are buffered by default as of Guile 3.0.5).
  313. ;; XXX: This wrapper would be unnecessary if GnuTLS could
  314. ;; automatically close SESSION's file descriptor when RECORD is
  315. ;; closed, but that doesn't seem to be possible currently (as of
  316. ;; 3.6.9).
  317. (unbuffered
  318. (make-custom-binary-input/output-port "gnutls wrapped port" read! write!
  319. get-position set-position!
  320. close)))))
  321. (define (ensure-uri uri-or-string) ;XXX: copied from (web http)
  322. (cond
  323. ((string? uri-or-string) (string->uri uri-or-string))
  324. ((uri? uri-or-string) uri-or-string)
  325. (else (error "Invalid URI" uri-or-string))))
  326. (define* (open-socket-for-uri uri-or-string #:key timeout)
  327. "Return an open input/output port for a connection to URI. When TIMEOUT is
  328. not #f, it must be a (possibly inexact) number denoting the maximum duration
  329. in seconds to wait for the connection to complete; passed TIMEOUT, an
  330. ETIMEDOUT error is raised."
  331. ;; Includes a fix for <http://bugs.gnu.org/15368> which affects Guile's
  332. ;; 'open-socket-for-uri' up to 2.0.11 included, uses 'connect*' instead
  333. ;; of 'connect', and uses AI_ADDRCONFIG.
  334. (define http-proxy (current-http-proxy))
  335. (define uri (ensure-uri (or http-proxy uri-or-string)))
  336. (define addresses
  337. (let ((port (uri-port uri)))
  338. (delete-duplicates
  339. (getaddrinfo (uri-host uri)
  340. (cond (port => number->string)
  341. (else (symbol->string (uri-scheme uri))))
  342. (if (number? port)
  343. (logior AI_ADDRCONFIG AI_NUMERICSERV)
  344. AI_ADDRCONFIG))
  345. (lambda (ai1 ai2)
  346. (equal? (addrinfo:addr ai1) (addrinfo:addr ai2))))))
  347. (let loop ((addresses addresses))
  348. (let* ((ai (car addresses))
  349. (s (with-fluids ((%default-port-encoding #f))
  350. ;; Restrict ourselves to TCP.
  351. (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP))))
  352. (catch 'system-error
  353. (lambda ()
  354. (connect* s (addrinfo:addr ai) timeout)
  355. ;; Buffer input and output on this port.
  356. (setvbuf s 'block)
  357. ;; If we're using a proxy, make a note of that.
  358. (when http-proxy (set-http-proxy-port?! s #t))
  359. s)
  360. (lambda args
  361. ;; Connection failed, so try one of the other addresses.
  362. (close s)
  363. (if (null? (cdr addresses))
  364. (apply throw args)
  365. (loop (cdr addresses))))))))
  366. (define (setup-http-tunnel port uri)
  367. "Establish over PORT an HTTP tunnel to the destination server of URI."
  368. (define target
  369. (string-append (uri-host uri) ":"
  370. (number->string
  371. (or (uri-port uri)
  372. (match (uri-scheme uri)
  373. ('http 80)
  374. ('https 443))))))
  375. (format port "CONNECT ~a HTTP/1.1\r\n" target)
  376. (format port "Host: ~a\r\n\r\n" target)
  377. (force-output port)
  378. (read-response port))
  379. (define* (open-connection-for-uri uri
  380. #:key
  381. timeout
  382. (verify-certificate? #t))
  383. "Like 'open-socket-for-uri', but also handle HTTPS connections. The
  384. resulting port must be closed with 'close-connection'. When
  385. VERIFY-CERTIFICATE? is true, verify HTTPS server certificates."
  386. ;; Note: Guile 2.2.0's (web client) has a same-named export that's actually
  387. ;; undefined. See Guile commit 011669af3b428e5626f7bbf66b11d57d9768c047.
  388. (define https?
  389. (eq? 'https (uri-scheme uri)))
  390. (define https-proxy (let ((proxy (getenv "https_proxy")))
  391. (and (not (equal? proxy ""))
  392. proxy)))
  393. (let-syntax ((with-https-proxy
  394. (syntax-rules ()
  395. ((_ exp)
  396. ;; For HTTPS URIs, honor 'https_proxy', not 'http_proxy'.
  397. (let ((thunk (lambda () exp)))
  398. (if (and https?
  399. (module-variable
  400. (resolve-interface '(web client))
  401. 'current-http-proxy))
  402. (parameterize ((current-http-proxy https-proxy))
  403. (thunk))
  404. (thunk)))))))
  405. (with-https-proxy
  406. (let ((s (open-socket-for-uri uri #:timeout timeout)))
  407. ;; Buffer input and output on this port.
  408. (setvbuf s 'block %http-receive-buffer-size)
  409. (when (and https? https-proxy)
  410. (setup-http-tunnel s uri))
  411. (if https?
  412. (tls-wrap s (uri-host uri)
  413. #:verify-certificate? verify-certificate?)
  414. s)))))
  415. (define (close-connection port) ;deprecated
  416. (unless (port-closed? port)
  417. (close-port port)))
  418. ;; XXX: This is an awful hack to make sure the (set-port-encoding! p
  419. ;; "ISO-8859-1") call in `read-response' passes, even during bootstrap
  420. ;; where iconv is not available.
  421. (module-define! (resolve-module '(web response))
  422. 'set-port-encoding!
  423. (lambda (p e) #f))
  424. (define (resolve-uri-reference ref base)
  425. "Resolve the URI reference REF, interpreted relative to the BASE URI, into a
  426. target URI, according to the algorithm specified in RFC 3986 section 5.2.2.
  427. Return the resulting target URI."
  428. (define (merge-paths base-path rel-path)
  429. (let* ((base-components (string-split base-path #\/))
  430. (base-directory-components (match base-components
  431. ((components ... last) components)
  432. (() '())))
  433. (base-directory (string-join base-directory-components "/")))
  434. (string-append base-directory "/" rel-path)))
  435. (define (remove-dot-segments path)
  436. (let loop ((in
  437. ;; Drop leading "." and ".." components from a relative path.
  438. ;; (absolute paths will start with a "" component)
  439. (drop-while (match-lambda
  440. ((or "." "..") #t)
  441. (_ #f))
  442. (string-split path #\/)))
  443. (out '()))
  444. (match in
  445. (("." . rest)
  446. (loop rest out))
  447. ((".." . rest)
  448. (match out
  449. ((or () (""))
  450. (error "remove-dot-segments: too many '..' components" path))
  451. (_
  452. (loop rest (cdr out)))))
  453. ((component . rest)
  454. (loop rest (cons component out)))
  455. (()
  456. (string-join (reverse out) "/")))))
  457. (cond ((or (uri-scheme ref)
  458. (uri-host ref))
  459. (build-uri (or (uri-scheme ref)
  460. (uri-scheme base))
  461. #:userinfo (uri-userinfo ref)
  462. #:host (uri-host ref)
  463. #:port (uri-port ref)
  464. #:path (remove-dot-segments (uri-path ref))
  465. #:query (uri-query ref)
  466. #:fragment (uri-fragment ref)))
  467. ((string-null? (uri-path ref))
  468. (build-uri (uri-scheme base)
  469. #:userinfo (uri-userinfo base)
  470. #:host (uri-host base)
  471. #:port (uri-port base)
  472. #:path (remove-dot-segments (uri-path base))
  473. #:query (or (uri-query ref)
  474. (uri-query base))
  475. #:fragment (uri-fragment ref)))
  476. (else
  477. (build-uri (uri-scheme base)
  478. #:userinfo (uri-userinfo base)
  479. #:host (uri-host base)
  480. #:port (uri-port base)
  481. #:path (remove-dot-segments
  482. (if (string-prefix? "/" (uri-path ref))
  483. (uri-path ref)
  484. (merge-paths (uri-path base)
  485. (uri-path ref))))
  486. #:query (uri-query ref)
  487. #:fragment (uri-fragment ref)))))
  488. (define* (http-fetch uri #:key timeout (verify-certificate? #t))
  489. "Return an input port containing the data at URI, and the expected number of
  490. bytes available or #f. When TIMEOUT is true, bail out if the connection could
  491. not be established in less than TIMEOUT seconds. When VERIFY-CERTIFICATE? is
  492. true, verify HTTPS certificates; otherwise simply ignore them."
  493. (define headers
  494. `(;; Some web sites, such as http://dist.schmorp.de, would block you if
  495. ;; there's no 'User-Agent' header, presumably on the assumption that
  496. ;; you're a spammer. So work around that.
  497. (User-Agent . "GNU Guile")
  498. ;; Some servers, such as https://alioth.debian.org, return "406 Not
  499. ;; Acceptable" when not explicitly told that everything is accepted.
  500. (Accept . "*/*")
  501. ;; Basic authentication, if needed.
  502. ,@(match (uri-userinfo uri)
  503. ((? string? str)
  504. `((Authorization . ,(string-append "Basic "
  505. (base64-encode
  506. (string->utf8 str))))))
  507. (_ '()))))
  508. (let*-values (((connection)
  509. (open-connection-for-uri uri
  510. #:timeout timeout
  511. #:verify-certificate?
  512. verify-certificate?))
  513. ((resp port)
  514. (http-get uri #:port connection #:decode-body? #f
  515. #:streaming? #t
  516. #:headers headers))
  517. ((code)
  518. (response-code resp)))
  519. (case code
  520. ((200) ; OK
  521. (values port (response-content-length resp)))
  522. ((301 ; moved permanently
  523. 302 ; found (redirection)
  524. 303 ; see other
  525. 307 ; temporary redirection
  526. 308) ; permanent redirection
  527. (let ((uri (resolve-uri-reference (response-location resp) uri)))
  528. (format #t "following redirection to `~a'...~%"
  529. (uri->string uri))
  530. (close connection)
  531. (http-fetch uri
  532. #:timeout timeout
  533. #:verify-certificate? verify-certificate?)))
  534. (else
  535. (error "download failed" (uri->string uri)
  536. code (response-reason-phrase resp))))))
  537. (define-syntax-rule (false-if-exception* body ...)
  538. "Like `false-if-exception', but print the exception on the error port."
  539. (catch #t
  540. (lambda ()
  541. body ...)
  542. (lambda (key . args)
  543. #f)
  544. (lambda (key . args)
  545. (print-exception (current-error-port) #f key args))))
  546. (define (uri-vicinity dir file)
  547. "Concatenate DIR, slash, and FILE, keeping only one slash in between.
  548. This is required by some HTTP servers."
  549. (string-append (string-trim-right dir #\/) "/"
  550. (string-trim file #\/)))
  551. (define (maybe-expand-mirrors uri mirrors)
  552. "If URI uses the 'mirror' scheme, expand it according to the MIRRORS alist.
  553. Return a list of URIs."
  554. (case (uri-scheme uri)
  555. ((mirror)
  556. (let ((kind (string->symbol (uri-host uri)))
  557. (path (uri-path uri)))
  558. (match (assoc-ref mirrors kind)
  559. ((mirrors ..1)
  560. (map (compose string->uri (cut uri-vicinity <> path))
  561. mirrors))
  562. (_
  563. (error "unsupported URL mirror kind" kind uri)))))
  564. (else
  565. (list uri))))
  566. (define* (disarchive-fetch/any uris file
  567. #:key (timeout 10) (verify-certificate? #t))
  568. "Fetch a Disarchive specification from any of URIS, assemble it,
  569. and write the output to FILE."
  570. (define (fetch-specification uris)
  571. (any (lambda (uri)
  572. (false-if-exception*
  573. (let-values (((port size) (http-fetch uri
  574. #:verify-certificate?
  575. verify-certificate?
  576. #:timeout timeout)))
  577. (let ((specification (read port)))
  578. (close-port port)
  579. specification))))
  580. uris))
  581. (define (resolve addresses output)
  582. (any (match-lambda
  583. (('swhid swhid)
  584. (match (string-split swhid #\:)
  585. (("swh" "1" "dir" id)
  586. (format #t "Downloading ~a from Software Heritage...~%" file)
  587. (false-if-exception*
  588. (swh-download-directory id output)))
  589. (_ #f)))
  590. (_ #f))
  591. addresses))
  592. (format #t "Trying to use Disarchive to assemble ~a...~%" file)
  593. (match (and=> (resolve-module '(disarchive) #:ensure #f)
  594. (lambda (disarchive)
  595. (cons (module-ref disarchive '%disarchive-log-port)
  596. (module-ref disarchive 'disarchive-assemble))))
  597. (#f (format #t "could not load Disarchive~%")
  598. #f)
  599. ((%disarchive-log-port . disarchive-assemble)
  600. (match (fetch-specification uris)
  601. (#f (format #t "could not find its Disarchive specification~%")
  602. #f)
  603. (spec (parameterize ((%disarchive-log-port (current-output-port)))
  604. (false-if-exception*
  605. (disarchive-assemble spec file #:resolver resolve))))))))
  606. (define* (url-fetch url file
  607. #:key
  608. (timeout 10) (verify-certificate? #t)
  609. (mirrors '()) (content-addressed-mirrors '())
  610. (disarchive-mirrors '())
  611. (hashes '())
  612. print-build-trace?)
  613. "Fetch FILE from URL; URL may be either a single string, or a list of
  614. string denoting alternate URLs for FILE. Return #f on failure, and FILE
  615. on success.
  616. When MIRRORS is defined, it must be an alist of mirrors; it is used to resolve
  617. 'mirror://' URIs.
  618. HASHES must be a list of algorithm/hash pairs, where each algorithm is a
  619. symbol such as 'sha256 and each hash is a bytevector.
  620. CONTENT-ADDRESSED-MIRRORS must be a list of procedures that, given a hash
  621. algorithm and a hash, return a URL where the specified data can be retrieved
  622. or #f.
  623. When VERIFY-CERTIFICATE? is true, validate HTTPS server certificates;
  624. otherwise simply ignore them."
  625. (define uri
  626. (append-map (cut maybe-expand-mirrors <> mirrors)
  627. (match url
  628. ((_ ...) (map string->uri url))
  629. (_ (list (string->uri url))))))
  630. (define (fetch uri file)
  631. (format #t "~%Starting download of ~a~%From ~a...~%"
  632. file (uri->string uri))
  633. (case (uri-scheme uri)
  634. ((http https)
  635. (false-if-exception*
  636. (let-values (((port size)
  637. (http-fetch uri
  638. #:verify-certificate? verify-certificate?
  639. #:timeout timeout)))
  640. (call-with-output-file file
  641. (lambda (output)
  642. (dump-port* port output
  643. #:buffer-size %http-receive-buffer-size
  644. #:reporter (if print-build-trace?
  645. (progress-reporter/trace
  646. file (uri->string uri) size)
  647. (progress-reporter/file
  648. (uri-abbreviation uri) size)))
  649. (newline)))
  650. file)))
  651. ((ftp)
  652. (false-if-exception* (ftp-fetch uri file
  653. #:timeout timeout
  654. #:print-build-trace?
  655. print-build-trace?)))
  656. (else
  657. (format #t "skipping URI with unsupported scheme: ~s~%"
  658. uri)
  659. #f)))
  660. (define content-addressed-uris
  661. (append-map (lambda (make-url)
  662. (filter-map (match-lambda
  663. ((hash-algo . hash)
  664. (let ((file (strip-store-file-name file)))
  665. (string->uri (make-url file hash-algo hash)))))
  666. hashes))
  667. content-addressed-mirrors))
  668. (define disarchive-uris
  669. (append-map (match-lambda
  670. ((? string? mirror)
  671. (map (match-lambda
  672. ((hash-algo . hash)
  673. (string->uri
  674. (string-append mirror
  675. (symbol->string hash-algo) "/"
  676. (bytevector->base16-string hash)))))
  677. hashes)))
  678. disarchive-mirrors))
  679. ;; Make this unbuffered so 'progress-report/file' works as expected. 'line
  680. ;; means '\n', not '\r', so it's not appropriate here.
  681. (setvbuf (current-output-port) 'none)
  682. (setvbuf (current-error-port) 'line)
  683. (let try ((uri (append uri content-addressed-uris)))
  684. (match uri
  685. ((uri tail ...)
  686. (or (fetch uri file)
  687. (try tail)))
  688. (()
  689. ;; If we are looking for a software archive, one last thing we
  690. ;; can try is to use Disarchive to assemble it.
  691. (or (disarchive-fetch/any disarchive-uris file
  692. #:verify-certificate? verify-certificate?
  693. #:timeout timeout)
  694. (begin
  695. (format (current-error-port) "failed to download ~s from ~s~%"
  696. file url)
  697. ;; Remove FILE in case we made an incomplete download, for
  698. ;; example due to ENOSPC.
  699. (catch 'system-error
  700. (lambda ()
  701. (delete-file file))
  702. (const #f))
  703. #f))))))
  704. ;;; download.scm ends here