download.scm 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2012-2022 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
  4. ;;; Copyright © 2017 Tobias Geerinckx-Rice <me@tobias.gr>
  5. ;;; Copyright © 2021 Timothy Sample <samplet@ngyro.com>
  6. ;;;
  7. ;;; This file is part of GNU Guix.
  8. ;;;
  9. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  10. ;;; under the terms of the GNU General Public License as published by
  11. ;;; the Free Software Foundation; either version 3 of the License, or (at
  12. ;;; your option) any later version.
  13. ;;;
  14. ;;; GNU Guix is distributed in the hope that it will be useful, but
  15. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. ;;; GNU General Public License for more details.
  18. ;;;
  19. ;;; You should have received a copy of the GNU General Public License
  20. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  21. (define-module (guix build download)
  22. #:use-module (web uri)
  23. #:use-module (web http)
  24. #:use-module ((web client) #:hide (open-socket-for-uri))
  25. #:use-module (web response)
  26. #:use-module (guix base64)
  27. #:use-module (guix ftp-client)
  28. #:use-module (guix build utils)
  29. #:use-module (guix progress)
  30. #:use-module (guix memoization)
  31. #:use-module (rnrs io ports)
  32. #:use-module (rnrs bytevectors)
  33. #:use-module (srfi srfi-1)
  34. #:use-module (srfi srfi-11)
  35. #:use-module (srfi srfi-19)
  36. #:use-module (srfi srfi-26)
  37. #:autoload (ice-9 ftw) (scandir)
  38. #:autoload (guix base16) (bytevector->base16-string)
  39. #:autoload (guix swh) (swh-download-directory %verify-swh-certificate?)
  40. #:use-module (ice-9 match)
  41. #:use-module (ice-9 format)
  42. #:export (open-socket-for-uri
  43. open-connection-for-uri
  44. http-fetch
  45. %x509-certificate-directory
  46. close-connection
  47. resolve-uri-reference
  48. maybe-expand-mirrors
  49. url-fetch
  50. byte-count->string
  51. uri-abbreviation
  52. nar-uri-abbreviation
  53. store-path-abbreviation))
  54. ;;; Commentary:
  55. ;;;
  56. ;;; Fetch data such as tarballs over HTTP or FTP (builder-side code).
  57. ;;;
  58. ;;; Code:
  59. (define %http-receive-buffer-size
  60. ;; Size of the HTTP receive buffer.
  61. 65536)
  62. (define* (ellipsis #:optional (port (current-output-port)))
  63. "Make a rough guess at whether Unicode's HORIZONTAL ELLIPSIS can be written
  64. in PORT's encoding, and return either that or ASCII dots."
  65. (if (equal? (port-encoding port) "UTF-8")
  66. "…"
  67. "..."))
  68. (define* (store-path-abbreviation store-path #:optional (prefix-length 6))
  69. "If STORE-PATH is the file name of a store entry, return an abbreviation of
  70. STORE-PATH for display, showing PREFIX-LENGTH characters of the hash.
  71. Otherwise return STORE-PATH."
  72. (if (string-prefix? (%store-directory) store-path)
  73. (let ((base (basename store-path)))
  74. (string-append (string-take base prefix-length)
  75. (ellipsis)
  76. (string-drop base 32)))
  77. store-path))
  78. (define* (uri-abbreviation uri #:optional (max-length 42))
  79. "If URI's string representation is larger than MAX-LENGTH, return an
  80. abbreviation of URI showing the scheme, host, and basename of the file."
  81. (define uri-as-string
  82. (uri->string uri))
  83. (define (elide-path)
  84. (let* ((path (uri-path uri))
  85. (base (basename path))
  86. (prefix (string-append (symbol->string (uri-scheme uri)) "://"
  87. ;; `file' URIs have no host part.
  88. (or (uri-host uri) "")
  89. (string-append "/" (ellipsis) "/"))))
  90. (if (> (+ (string-length prefix) (string-length base)) max-length)
  91. (string-append prefix (ellipsis)
  92. (string-drop base (quotient (string-length base) 2)))
  93. (string-append prefix base))))
  94. (if (> (string-length uri-as-string) max-length)
  95. (let ((short (elide-path)))
  96. (if (< (string-length short) (string-length uri-as-string))
  97. short
  98. uri-as-string))
  99. uri-as-string))
  100. (define (nar-uri-abbreviation uri)
  101. "Abbreviate URI, which is assumed to be the URI of a nar as served by Hydra
  102. and 'guix publish', something like
  103. \"http://example.org/nar/1ldrllwbna0aw5z8kpci4fsvbd2w8cw4-texlive-bin-2015\"."
  104. (let* ((uri (if (string? uri) (string->uri uri) uri))
  105. (path (basename (uri-path uri))))
  106. (if (and (> (string-length path) 33)
  107. (char=? (string-ref path 32) #\-))
  108. (string-drop path 33)
  109. path)))
  110. (define* (ftp-fetch uri file #:key timeout print-build-trace?)
  111. "Fetch data from URI and write it to FILE. Return FILE on success. Bail
  112. out if the connection could not be established in less than TIMEOUT seconds."
  113. (let* ((conn (match (and=> (uri-userinfo uri)
  114. (cut string-split <> #\:))
  115. (((? string? user))
  116. (ftp-open (uri-host uri) #:timeout timeout
  117. #:username user))
  118. (((? string? user) (? string? pass))
  119. (ftp-open (uri-host uri) #:timeout timeout
  120. #:username user
  121. #:password pass))
  122. (_ (ftp-open (uri-host uri) #:timeout timeout))))
  123. (size (false-if-exception (ftp-size conn (uri-path uri))))
  124. (in (ftp-retr conn (basename (uri-path uri))
  125. (dirname (uri-path uri))
  126. #:timeout timeout)))
  127. (call-with-output-file file
  128. (lambda (out)
  129. (dump-port* in out
  130. #:buffer-size %http-receive-buffer-size
  131. #:reporter
  132. (if print-build-trace?
  133. (progress-reporter/trace
  134. file (uri->string uri) size)
  135. (progress-reporter/file
  136. (uri-abbreviation uri) size)))))
  137. (ftp-close conn)
  138. (unless print-build-trace?
  139. (newline))
  140. file))
  141. ;; Autoload GnuTLS so that this module can be used even when GnuTLS is
  142. ;; not available. At compile time, this yields "possibly unbound
  143. ;; variable" warnings, but these are OK: we know that the variables will
  144. ;; be bound if we need them, because (guix download) adds GnuTLS as an
  145. ;; input in that case.
  146. (define (load-gnutls)
  147. ;; XXX: Use this hack instead of #:autoload to avoid compilation errors.
  148. ;; See <http://bugs.gnu.org/12202>.
  149. (module-use! (resolve-module '(guix build download))
  150. (resolve-interface '(gnutls)))
  151. (set! load-gnutls (const #t)))
  152. (define %x509-certificate-directory
  153. ;; The directory where X.509 authority PEM certificates are stored.
  154. (make-parameter (or (getenv "GUIX_TLS_CERTIFICATE_DIRECTORY")
  155. (getenv "SSL_CERT_DIR") ;like OpenSSL
  156. "/etc/ssl/certs")))
  157. (define (set-certificate-credentials-x509-trust-file!* cred file format)
  158. "Like 'set-certificate-credentials-x509-trust-file!', but without the file
  159. name decoding bug described at
  160. <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=26948#17>."
  161. (let ((data (call-with-input-file file get-bytevector-all)))
  162. (set-certificate-credentials-x509-trust-data! cred data format)))
  163. (define make-credentials-with-ca-trust-files
  164. (mlambda (directory)
  165. "Return certificate credentials with X.509 authority certificates read from
  166. DIRECTORY. Those authority certificates are checked when
  167. 'peer-certificate-status' is later called."
  168. ;; Memoize the result to avoid scanning all the certificates every time a
  169. ;; connection is made.
  170. (let ((cred (make-certificate-credentials))
  171. (files (match (scandir directory (cut string-suffix? ".pem" <>))
  172. ((or #f ())
  173. ;; Some distros provide nothing but bundles (*.crt) under
  174. ;; /etc/ssl/certs, so look for them.
  175. (or (scandir directory (cut string-suffix? ".crt" <>))
  176. '()))
  177. (pem pem))))
  178. (for-each (lambda (file)
  179. (let ((file (string-append directory "/" file)))
  180. ;; Protect against dangling symlinks.
  181. (when (file-exists? file)
  182. (set-certificate-credentials-x509-trust-file!*
  183. cred file
  184. x509-certificate-format/pem))))
  185. files)
  186. cred)))
  187. (define (peer-certificate session)
  188. "Return the certificate of the remote peer in SESSION."
  189. (match (session-peer-certificate-chain session)
  190. ((first _ ...)
  191. (import-x509-certificate first x509-certificate-format/der))))
  192. (define (assert-valid-server-certificate session server)
  193. "Return #t if the certificate of the remote peer for SESSION is a valid
  194. certificate for SERVER, where SERVER is the expected host name of peer."
  195. (define cert
  196. (peer-certificate session))
  197. ;; First check whether the server's certificate matches SERVER.
  198. (unless (x509-certificate-matches-hostname? cert server)
  199. (throw 'tls-certificate-error 'host-mismatch cert server))
  200. ;; Second check its validity and reachability from the set of authority
  201. ;; certificates loaded via 'set-certificate-credentials-x509-trust-file!'.
  202. (match (peer-certificate-status session)
  203. (() ;certificate is valid
  204. #t)
  205. ((statuses ...)
  206. (throw 'tls-certificate-error 'invalid-certificate cert server
  207. statuses))))
  208. (define (print-tls-certificate-error port key args default-printer)
  209. "Print the TLS certificate error represented by ARGS in an intelligible
  210. way."
  211. (match args
  212. (('host-mismatch cert server)
  213. (format port
  214. "X.509 server certificate for '~a' does not match: ~a~%"
  215. server (x509-certificate-dn cert)))
  216. (('invalid-certificate cert server statuses)
  217. (format port
  218. "X.509 certificate of '~a' could not be verified:~%~{ ~a~%~}"
  219. server
  220. (map certificate-status->string statuses)))))
  221. (set-exception-printer! 'tls-certificate-error
  222. print-tls-certificate-error)
  223. (define* (tls-wrap port server #:key (verify-certificate? #t))
  224. "Return PORT wrapped in a TLS connection to SERVER. SERVER must be a DNS
  225. host name without trailing dot."
  226. (define (log level str)
  227. (format (current-error-port)
  228. "gnutls: [~a|~a] ~a" (getpid) level str))
  229. (load-gnutls)
  230. (let ((session (make-session connection-end/client))
  231. (ca-certs (%x509-certificate-directory)))
  232. ;; Some servers such as 'cloud.github.com' require the client to support
  233. ;; the 'SERVER NAME' extension. However, 'set-session-server-name!' is
  234. ;; not available in older GnuTLS releases. See
  235. ;; <http://bugs.gnu.org/18526> for details.
  236. (if (module-defined? (resolve-interface '(gnutls))
  237. 'set-session-server-name!)
  238. (set-session-server-name! session server-name-type/dns server)
  239. (format (current-error-port)
  240. "warning: TLS 'SERVER NAME' extension not supported~%"))
  241. (set-session-transport-fd! session (fileno port))
  242. (set-session-default-priority! session)
  243. ;; The "%COMPAT" bit allows us to work around firewall issues (info
  244. ;; "(gnutls) Priority Strings"); see <http://bugs.gnu.org/23311>.
  245. ;; Explicitly disable SSLv3, which is insecure:
  246. ;; <https://tools.ietf.org/html/rfc7568>.
  247. (set-session-priorities! session "NORMAL:%COMPAT:-VERS-SSL3.0")
  248. (set-session-credentials! session
  249. (if (and verify-certificate? ca-certs)
  250. (make-credentials-with-ca-trust-files
  251. ca-certs)
  252. (make-certificate-credentials)))
  253. ;; Uncomment the following lines in case of debugging emergency.
  254. ;;(set-log-level! 10)
  255. ;;(set-log-procedure! log)
  256. (let loop ((retries 5))
  257. (catch 'gnutls-error
  258. (lambda ()
  259. (handshake session))
  260. (lambda (key err proc . rest)
  261. (cond ((eq? err error/warning-alert-received)
  262. ;; Like Wget, do no stop upon non-fatal alerts such as
  263. ;; 'alert-description/unrecognized-name'.
  264. (format (current-error-port)
  265. "warning: TLS warning alert received: ~a~%"
  266. (alert-description->string (alert-get session)))
  267. (handshake session))
  268. (else
  269. (if (or (fatal-error? err) (zero? retries))
  270. (apply throw key err proc rest)
  271. (begin
  272. ;; We got 'error/again' or similar; try again.
  273. (format (current-error-port)
  274. "warning: TLS non-fatal error: ~a~%"
  275. (error->string err))
  276. (loop (- retries 1)))))))))
  277. ;; Verify the server's certificate if needed.
  278. (when verify-certificate?
  279. (catch 'tls-certificate-error
  280. (lambda ()
  281. (assert-valid-server-certificate session server))
  282. (lambda args
  283. (close-port port)
  284. (apply throw args))))
  285. (let ((record (session-record-port session)))
  286. (define (read! bv start count)
  287. (define read
  288. (catch 'gnutls-error
  289. (lambda ()
  290. (get-bytevector-n! record bv start count))
  291. (lambda (key err proc . rest)
  292. ;; When responding to "Connection: close" requests, some
  293. ;; servers close the connection abruptly after sending the
  294. ;; response body, without doing a proper TLS connection
  295. ;; termination. Treat it as EOF.
  296. (if (eq? err error/premature-termination)
  297. the-eof-object
  298. (apply throw key err proc rest)))))
  299. (if (eof-object? read)
  300. 0
  301. read))
  302. (define (write! bv start count)
  303. (put-bytevector record bv start count)
  304. (force-output record)
  305. count)
  306. (define (get-position)
  307. (port-position record))
  308. (define (set-position! new-position)
  309. (set-port-position! record new-position))
  310. (define (close)
  311. (unless (port-closed? port)
  312. (close-port port))
  313. (unless (port-closed? record)
  314. (close-port record)))
  315. (define (unbuffered port)
  316. (setvbuf port 'none)
  317. port)
  318. (setvbuf record 'block)
  319. ;; Return a port that wraps RECORD to ensure that closing it also
  320. ;; closes PORT, the actual socket port, and its file descriptor.
  321. ;; Make sure it does not introduce extra buffering (custom ports
  322. ;; are buffered by default as of Guile 3.0.5).
  323. ;; XXX: This wrapper would be unnecessary if GnuTLS could
  324. ;; automatically close SESSION's file descriptor when RECORD is
  325. ;; closed, but that doesn't seem to be possible currently (as of
  326. ;; 3.6.9).
  327. (unbuffered
  328. (make-custom-binary-input/output-port "gnutls wrapped port" read! write!
  329. get-position set-position!
  330. close)))))
  331. (define (ensure-uri uri-or-string) ;XXX: copied from (web http)
  332. (cond
  333. ((string? uri-or-string) (string->uri uri-or-string))
  334. ((uri? uri-or-string) uri-or-string)
  335. (else (error "Invalid URI" uri-or-string))))
  336. (define* (open-socket-for-uri uri-or-string #:key timeout)
  337. "Return an open input/output port for a connection to URI. When TIMEOUT is
  338. not #f, it must be a (possibly inexact) number denoting the maximum duration
  339. in seconds to wait for the connection to complete; passed TIMEOUT, an
  340. ETIMEDOUT error is raised."
  341. ;; Includes a fix for <http://bugs.gnu.org/15368> which affects Guile's
  342. ;; 'open-socket-for-uri' up to 2.0.11 included, uses 'connect*' instead
  343. ;; of 'connect', and uses AI_ADDRCONFIG.
  344. (define http-proxy (current-http-proxy))
  345. (define uri (ensure-uri (or http-proxy uri-or-string)))
  346. (define addresses
  347. (let ((port (uri-port uri)))
  348. (delete-duplicates
  349. (getaddrinfo (uri-host uri)
  350. (cond (port => number->string)
  351. (else (symbol->string (uri-scheme uri))))
  352. (if (number? port)
  353. (logior AI_ADDRCONFIG AI_NUMERICSERV)
  354. AI_ADDRCONFIG))
  355. (lambda (ai1 ai2)
  356. (equal? (addrinfo:addr ai1) (addrinfo:addr ai2))))))
  357. (let loop ((addresses addresses))
  358. (let* ((ai (car addresses))
  359. (s (with-fluids ((%default-port-encoding #f))
  360. ;; Restrict ourselves to TCP.
  361. (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP))))
  362. (catch 'system-error
  363. (lambda ()
  364. (connect* s (addrinfo:addr ai) timeout)
  365. ;; Buffer input and output on this port.
  366. (setvbuf s 'block)
  367. ;; If we're using a proxy, make a note of that.
  368. (when http-proxy (set-http-proxy-port?! s #t))
  369. s)
  370. (lambda args
  371. ;; Connection failed, so try one of the other addresses.
  372. (close s)
  373. (if (null? (cdr addresses))
  374. (apply throw args)
  375. (loop (cdr addresses))))))))
  376. (define (setup-http-tunnel port uri)
  377. "Establish over PORT an HTTP tunnel to the destination server of URI."
  378. (define target
  379. (string-append (uri-host uri) ":"
  380. (number->string
  381. (or (uri-port uri)
  382. (match (uri-scheme uri)
  383. ('http 80)
  384. ('https 443))))))
  385. (format port "CONNECT ~a HTTP/1.1\r\n" target)
  386. (format port "Host: ~a\r\n\r\n" target)
  387. (force-output port)
  388. (read-response port))
  389. (define* (open-connection-for-uri uri
  390. #:key
  391. timeout
  392. (verify-certificate? #t))
  393. "Like 'open-socket-for-uri', but also handle HTTPS connections. When
  394. VERIFY-CERTIFICATE? is true, verify HTTPS server certificates."
  395. ;; Note: Guile 2.2.0's (web client) has a same-named export that's actually
  396. ;; undefined. See Guile commit 011669af3b428e5626f7bbf66b11d57d9768c047.
  397. (define https?
  398. (eq? 'https (uri-scheme uri)))
  399. (define https-proxy (let ((proxy (getenv "https_proxy")))
  400. (and (not (equal? proxy ""))
  401. proxy)))
  402. (let-syntax ((with-https-proxy
  403. (syntax-rules ()
  404. ((_ exp)
  405. ;; For HTTPS URIs, honor 'https_proxy', not 'http_proxy'.
  406. (let ((thunk (lambda () exp)))
  407. (if (and https?
  408. (module-variable
  409. (resolve-interface '(web client))
  410. 'current-http-proxy))
  411. (parameterize ((current-http-proxy https-proxy))
  412. (thunk))
  413. (thunk)))))))
  414. (with-https-proxy
  415. (let ((s (open-socket-for-uri uri #:timeout timeout)))
  416. ;; Buffer input and output on this port.
  417. (setvbuf s 'block %http-receive-buffer-size)
  418. (when (and https? https-proxy)
  419. (setup-http-tunnel s uri))
  420. (if https?
  421. (tls-wrap s (uri-host uri)
  422. #:verify-certificate? verify-certificate?)
  423. s)))))
  424. (define (close-connection port) ;deprecated
  425. (unless (port-closed? port)
  426. (close-port port)))
  427. ;; XXX: This is an awful hack to make sure the (set-port-encoding! p
  428. ;; "ISO-8859-1") call in `read-response' passes, even during bootstrap
  429. ;; where iconv is not available.
  430. (module-define! (resolve-module '(web response))
  431. 'set-port-encoding!
  432. (lambda (p e) #f))
  433. (define (resolve-uri-reference ref base)
  434. "Resolve the URI reference REF, interpreted relative to the BASE URI, into a
  435. target URI, according to the algorithm specified in RFC 3986 section 5.2.2.
  436. Return the resulting target URI."
  437. (define (merge-paths base-path rel-path)
  438. (let* ((base-components (string-split base-path #\/))
  439. (base-directory-components (match base-components
  440. ((components ... last) components)
  441. (() '())))
  442. (base-directory (string-join base-directory-components "/")))
  443. (string-append base-directory "/" rel-path)))
  444. (define (remove-dot-segments path)
  445. (let loop ((in
  446. ;; Drop leading "." and ".." components from a relative path.
  447. ;; (absolute paths will start with a "" component)
  448. (drop-while (match-lambda
  449. ((or "." "..") #t)
  450. (_ #f))
  451. (string-split path #\/)))
  452. (out '()))
  453. (match in
  454. (("." . rest)
  455. (loop rest out))
  456. ((".." . rest)
  457. (match out
  458. ((or () (""))
  459. (error "remove-dot-segments: too many '..' components" path))
  460. (_
  461. (loop rest (cdr out)))))
  462. ((component . rest)
  463. (loop rest (cons component out)))
  464. (()
  465. (string-join (reverse out) "/")))))
  466. (cond ((or (uri-scheme ref)
  467. (uri-host ref))
  468. (build-uri (or (uri-scheme ref)
  469. (uri-scheme base))
  470. #:userinfo (uri-userinfo ref)
  471. #:host (uri-host ref)
  472. #:port (uri-port ref)
  473. #:path (remove-dot-segments (uri-path ref))
  474. #:query (uri-query ref)
  475. #:fragment (uri-fragment ref)))
  476. ((string-null? (uri-path ref))
  477. (build-uri (uri-scheme base)
  478. #:userinfo (uri-userinfo base)
  479. #:host (uri-host base)
  480. #:port (uri-port base)
  481. #:path (remove-dot-segments (uri-path base))
  482. #:query (or (uri-query ref)
  483. (uri-query base))
  484. #:fragment (uri-fragment ref)))
  485. (else
  486. (build-uri (uri-scheme base)
  487. #:userinfo (uri-userinfo base)
  488. #:host (uri-host base)
  489. #:port (uri-port base)
  490. #:path (remove-dot-segments
  491. (if (string-prefix? "/" (uri-path ref))
  492. (uri-path ref)
  493. (merge-paths (uri-path base)
  494. (uri-path ref))))
  495. #:query (uri-query ref)
  496. #:fragment (uri-fragment ref)))))
  497. (define* (http-fetch uri #:key timeout (verify-certificate? #t))
  498. "Return an input port containing the data at URI, and the expected number of
  499. bytes available or #f. When TIMEOUT is true, bail out if the connection could
  500. not be established in less than TIMEOUT seconds. When VERIFY-CERTIFICATE? is
  501. true, verify HTTPS certificates; otherwise simply ignore them."
  502. (define headers
  503. `(;; Some web sites, such as http://dist.schmorp.de, would block you if
  504. ;; there's no 'User-Agent' header, presumably on the assumption that
  505. ;; you're a spammer. So work around that.
  506. (User-Agent . "GNU Guile")
  507. ;; Some servers, such as https://alioth.debian.org, return "406 Not
  508. ;; Acceptable" when not explicitly told that everything is accepted.
  509. (Accept . "*/*")
  510. ;; Basic authentication, if needed.
  511. ,@(match (uri-userinfo uri)
  512. ((? string? str)
  513. `((Authorization . ,(string-append "Basic "
  514. (base64-encode
  515. (string->utf8 str))))))
  516. (_ '()))))
  517. (let*-values (((connection)
  518. (open-connection-for-uri uri
  519. #:timeout timeout
  520. #:verify-certificate?
  521. verify-certificate?))
  522. ((resp port)
  523. (http-get uri #:port connection #:decode-body? #f
  524. #:streaming? #t
  525. #:headers headers))
  526. ((code)
  527. (response-code resp)))
  528. (case code
  529. ((200) ; OK
  530. (values port (response-content-length resp)))
  531. ((301 ; moved permanently
  532. 302 ; found (redirection)
  533. 303 ; see other
  534. 307 ; temporary redirection
  535. 308) ; permanent redirection
  536. (let ((uri (resolve-uri-reference (response-location resp) uri)))
  537. (format #t "following redirection to `~a'...~%"
  538. (uri->string uri))
  539. (close connection)
  540. (http-fetch uri
  541. #:timeout timeout
  542. #:verify-certificate? verify-certificate?)))
  543. (else
  544. (error "download failed" (uri->string uri)
  545. code (response-reason-phrase resp))))))
  546. (define-syntax-rule (false-if-exception* body ...)
  547. "Like `false-if-exception', but print the exception on the error port."
  548. (catch #t
  549. (lambda ()
  550. body ...)
  551. (lambda (key . args)
  552. #f)
  553. (lambda (key . args)
  554. (print-exception (current-error-port) #f key args))))
  555. (define (uri-vicinity dir file)
  556. "Concatenate DIR, slash, and FILE, keeping only one slash in between.
  557. This is required by some HTTP servers."
  558. (string-append (string-trim-right dir #\/) "/"
  559. (string-trim file #\/)))
  560. (define (maybe-expand-mirrors uri mirrors)
  561. "If URI uses the 'mirror' scheme, expand it according to the MIRRORS alist.
  562. Return a list of URIs."
  563. (case (uri-scheme uri)
  564. ((mirror)
  565. (let ((kind (string->symbol (uri-host uri)))
  566. (path (uri-path uri)))
  567. (match (assoc-ref mirrors kind)
  568. ((mirrors ..1)
  569. (map (compose string->uri (cut uri-vicinity <> path))
  570. mirrors))
  571. (_
  572. (error "unsupported URL mirror kind" kind uri)))))
  573. (else
  574. (list uri))))
  575. (define* (disarchive-fetch/any uris file
  576. #:key (timeout 10) (verify-certificate? #t))
  577. "Fetch a Disarchive specification from any of URIS, assemble it,
  578. and write the output to FILE."
  579. (define (fetch-specification uris)
  580. (any (lambda (uri)
  581. (false-if-exception*
  582. (let-values (((port size) (http-fetch uri
  583. #:verify-certificate?
  584. verify-certificate?
  585. #:timeout timeout)))
  586. (format #t "Retrieving Disarchive spec from ~a ...~%"
  587. (uri->string uri))
  588. (let ((specification (read port)))
  589. (close-port port)
  590. specification))))
  591. uris))
  592. (define (resolve addresses output)
  593. (any (match-lambda
  594. (('swhid swhid)
  595. (match (string-split swhid #\:)
  596. (("swh" "1" "dir" id)
  597. (format #t "Downloading ~a from Software Heritage...~%" file)
  598. (false-if-exception*
  599. (swh-download-directory id output)))
  600. (_ #f)))
  601. (_ #f))
  602. addresses))
  603. (format #t "Trying to use Disarchive to assemble ~a...~%" file)
  604. (match (and=> (resolve-module '(disarchive) #:ensure #f)
  605. (lambda (disarchive)
  606. (cons (module-ref disarchive '%disarchive-log-port)
  607. (module-ref disarchive 'disarchive-assemble))))
  608. (#f (format #t "could not load Disarchive~%")
  609. #f)
  610. ((%disarchive-log-port . disarchive-assemble)
  611. (match (fetch-specification uris)
  612. (#f (format #t "could not find its Disarchive specification~%")
  613. #f)
  614. (spec (parameterize ((%disarchive-log-port (current-output-port))
  615. (%verify-swh-certificate? verify-certificate?))
  616. (false-if-exception*
  617. (disarchive-assemble spec file #:resolver resolve))))))))
  618. (define (internet-archive-uri uri)
  619. "Return a URI corresponding to an Internet Archive backup of URI, or #f if
  620. URI does not denote a Web URI."
  621. (and (memq (uri-scheme uri) '(http https))
  622. (let* ((now (time-utc->date (current-time time-utc)))
  623. (date (date->string now "~Y~m~d~H~M~S")))
  624. ;; Note: the date in the URL can be anything and web.archive.org
  625. ;; automatically redirects to the closest date.
  626. (build-uri 'https #:host "web.archive.org"
  627. #:path (string-append "/web/" date "/"
  628. (uri->string uri))))))
  629. (define* (url-fetch url file
  630. #:key
  631. (timeout 10) (verify-certificate? #t)
  632. (mirrors '()) (content-addressed-mirrors '())
  633. (disarchive-mirrors '())
  634. (hashes '())
  635. print-build-trace?)
  636. "Fetch FILE from URL; URL may be either a single string, or a list of
  637. string denoting alternate URLs for FILE. Return #f on failure, and FILE
  638. on success.
  639. When MIRRORS is defined, it must be an alist of mirrors; it is used to resolve
  640. 'mirror://' URIs.
  641. HASHES must be a list of algorithm/hash pairs, where each algorithm is a
  642. symbol such as 'sha256 and each hash is a bytevector.
  643. CONTENT-ADDRESSED-MIRRORS must be a list of procedures that, given a hash
  644. algorithm and a hash, return a URL where the specified data can be retrieved
  645. or #f.
  646. When VERIFY-CERTIFICATE? is true, validate HTTPS server certificates;
  647. otherwise simply ignore them."
  648. (define uri
  649. (append-map (cut maybe-expand-mirrors <> mirrors)
  650. (match url
  651. ((_ ...) (map string->uri url))
  652. (_ (list (string->uri url))))))
  653. (define (fetch uri file)
  654. (format #t "~%Starting download of ~a~%From ~a...~%"
  655. file (uri->string uri))
  656. (case (uri-scheme uri)
  657. ((http https)
  658. (false-if-exception*
  659. (let-values (((port size)
  660. (http-fetch uri
  661. #:verify-certificate? verify-certificate?
  662. #:timeout timeout)))
  663. (call-with-output-file file
  664. (lambda (output)
  665. (dump-port* port output
  666. #:buffer-size %http-receive-buffer-size
  667. #:reporter (if print-build-trace?
  668. (progress-reporter/trace
  669. file (uri->string uri) size)
  670. (progress-reporter/file
  671. (uri-abbreviation uri) size)))
  672. (newline)))
  673. file)))
  674. ((ftp)
  675. (false-if-exception* (ftp-fetch uri file
  676. #:timeout timeout
  677. #:print-build-trace?
  678. print-build-trace?)))
  679. (else
  680. (format #t "skipping URI with unsupported scheme: ~s~%"
  681. uri)
  682. #f)))
  683. (define content-addressed-uris
  684. (append-map (lambda (make-url)
  685. (filter-map (match-lambda
  686. ((hash-algo . hash)
  687. (let ((file (strip-store-file-name file)))
  688. (string->uri (make-url file hash-algo hash)))))
  689. hashes))
  690. content-addressed-mirrors))
  691. (define disarchive-uris
  692. (append-map (lambda (mirror)
  693. (let ((make-url (match mirror
  694. ((? string?)
  695. (lambda (hash-algo hash)
  696. (string-append
  697. mirror
  698. (symbol->string hash-algo) "/"
  699. (bytevector->base16-string hash))))
  700. ((? procedure?)
  701. mirror))))
  702. (map (match-lambda
  703. ((hash-algo . hash)
  704. (string->uri (make-url hash-algo hash))))
  705. hashes)))
  706. disarchive-mirrors))
  707. ;; Make this unbuffered so 'progress-report/file' works as expected. 'line
  708. ;; means '\n', not '\r', so it's not appropriate here.
  709. (setvbuf (current-output-port) 'none)
  710. (setvbuf (current-error-port) 'line)
  711. (let try ((uri (append uri content-addressed-uris
  712. (match uri
  713. ((first . _)
  714. (or (and=> (internet-archive-uri first) list)
  715. '()))
  716. (() '())))))
  717. (match uri
  718. ((uri tail ...)
  719. (or (fetch uri file)
  720. (try tail)))
  721. (()
  722. ;; If we are looking for a software archive, one last thing we
  723. ;; can try is to use Disarchive to assemble it.
  724. (or (disarchive-fetch/any disarchive-uris file
  725. #:verify-certificate? verify-certificate?
  726. #:timeout timeout)
  727. (begin
  728. (format (current-error-port) "failed to download ~s from ~s~%"
  729. file url)
  730. ;; Remove FILE in case we made an incomplete download, for
  731. ;; example due to ENOSPC.
  732. (catch 'system-error
  733. (lambda ()
  734. (delete-file file))
  735. (const #f))
  736. #f))))))
  737. ;;; download.scm ends here