download.scm 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2012-2022 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
  4. ;;; Copyright © 2017 Tobias Geerinckx-Rice <me@tobias.gr>
  5. ;;; Copyright © 2021 Timothy Sample <samplet@ngyro.com>
  6. ;;;
  7. ;;; This file is part of GNU Guix.
  8. ;;;
  9. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  10. ;;; under the terms of the GNU General Public License as published by
  11. ;;; the Free Software Foundation; either version 3 of the License, or (at
  12. ;;; your option) any later version.
  13. ;;;
  14. ;;; GNU Guix is distributed in the hope that it will be useful, but
  15. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. ;;; GNU General Public License for more details.
  18. ;;;
  19. ;;; You should have received a copy of the GNU General Public License
  20. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  21. (define-module (guix build download)
  22. #:use-module (web uri)
  23. #:use-module (web http)
  24. #:use-module ((web client) #:hide (open-socket-for-uri))
  25. #:use-module (web response)
  26. #:use-module (guix base64)
  27. #:use-module (guix ftp-client)
  28. #:use-module (guix build utils)
  29. #:use-module (guix progress)
  30. #:use-module (guix memoization)
  31. #:use-module (rnrs io ports)
  32. #:use-module (rnrs bytevectors)
  33. #:use-module (srfi srfi-1)
  34. #:use-module (srfi srfi-11)
  35. #:use-module (srfi srfi-19)
  36. #:use-module (srfi srfi-26)
  37. #:autoload (ice-9 ftw) (scandir)
  38. #:autoload (guix base16) (bytevector->base16-string)
  39. #:autoload (guix swh) (swh-download-directory %verify-swh-certificate?)
  40. #:use-module (ice-9 match)
  41. #:use-module (ice-9 format)
  42. #:export (open-socket-for-uri
  43. open-connection-for-uri
  44. http-fetch
  45. %x509-certificate-directory
  46. close-connection
  47. resolve-uri-reference
  48. maybe-expand-mirrors
  49. url-fetch
  50. byte-count->string
  51. uri-abbreviation
  52. nar-uri-abbreviation
  53. store-path-abbreviation))
  54. ;;; Commentary:
  55. ;;;
  56. ;;; Fetch data such as tarballs over HTTP or FTP (builder-side code).
  57. ;;;
  58. ;;; Code:
  59. (define %http-receive-buffer-size
  60. ;; Size of the HTTP receive buffer.
  61. 65536)
  62. (define* (ellipsis #:optional (port (current-output-port)))
  63. "Make a rough guess at whether Unicode's HORIZONTAL ELLIPSIS can be written
  64. in PORT's encoding, and return either that or ASCII dots."
  65. (if (equal? (port-encoding port) "UTF-8")
  66. "…"
  67. "..."))
  68. (define* (store-path-abbreviation store-path #:optional (prefix-length 6))
  69. "If STORE-PATH is the file name of a store entry, return an abbreviation of
  70. STORE-PATH for display, showing PREFIX-LENGTH characters of the hash.
  71. Otherwise return STORE-PATH."
  72. (if (string-prefix? (%store-directory) store-path)
  73. (let ((base (basename store-path)))
  74. (string-append (string-take base prefix-length)
  75. (ellipsis)
  76. (string-drop base 32)))
  77. store-path))
  78. (define* (uri-abbreviation uri #:optional (max-length 42))
  79. "If URI's string representation is larger than MAX-LENGTH, return an
  80. abbreviation of URI showing the scheme, host, and basename of the file."
  81. (define uri-as-string
  82. (uri->string uri))
  83. (define (elide-path)
  84. (let* ((path (uri-path uri))
  85. (base (basename path))
  86. (prefix (string-append (symbol->string (uri-scheme uri)) "://"
  87. ;; `file' URIs have no host part.
  88. (or (uri-host uri) "")
  89. (string-append "/" (ellipsis) "/"))))
  90. (if (> (+ (string-length prefix) (string-length base)) max-length)
  91. (string-append prefix (ellipsis)
  92. (string-drop base (quotient (string-length base) 2)))
  93. (string-append prefix base))))
  94. (if (> (string-length uri-as-string) max-length)
  95. (let ((short (elide-path)))
  96. (if (< (string-length short) (string-length uri-as-string))
  97. short
  98. uri-as-string))
  99. uri-as-string))
  100. (define (nar-uri-abbreviation uri)
  101. "Abbreviate URI, which is assumed to be the URI of a nar as served by Hydra
  102. and 'guix publish', something like
  103. \"http://example.org/nar/1ldrllwbna0aw5z8kpci4fsvbd2w8cw4-texlive-bin-2015\"."
  104. (let* ((uri (if (string? uri) (string->uri uri) uri))
  105. (path (basename (uri-path uri))))
  106. (if (and (> (string-length path) 33)
  107. (char=? (string-ref path 32) #\-))
  108. (string-drop path 33)
  109. path)))
  110. (define* (ftp-fetch uri file #:key timeout print-build-trace?)
  111. "Fetch data from URI and write it to FILE. Return FILE on success. Bail
  112. out if the connection could not be established in less than TIMEOUT seconds."
  113. (let* ((conn (match (and=> (uri-userinfo uri)
  114. (cut string-split <> #\:))
  115. (((? string? user))
  116. (ftp-open (uri-host uri) #:timeout timeout
  117. #:username user))
  118. (((? string? user) (? string? pass))
  119. (ftp-open (uri-host uri) #:timeout timeout
  120. #:username user
  121. #:password pass))
  122. (_ (ftp-open (uri-host uri) #:timeout timeout))))
  123. (size (false-if-exception (ftp-size conn (uri-path uri))))
  124. (in (ftp-retr conn (basename (uri-path uri))
  125. (dirname (uri-path uri))
  126. #:timeout timeout)))
  127. (call-with-output-file file
  128. (lambda (out)
  129. (dump-port* in out
  130. #:buffer-size %http-receive-buffer-size
  131. #:reporter
  132. (if print-build-trace?
  133. (progress-reporter/trace
  134. file (uri->string uri) size)
  135. (progress-reporter/file
  136. (uri-abbreviation uri) size)))))
  137. (ftp-close conn)
  138. (unless print-build-trace?
  139. (newline))
  140. file))
  141. ;; Autoload GnuTLS so that this module can be used even when GnuTLS is
  142. ;; not available. At compile time, this yields "possibly unbound
  143. ;; variable" warnings, but these are OK: we know that the variables will
  144. ;; be bound if we need them, because (guix download) adds GnuTLS as an
  145. ;; input in that case.
  146. (define (load-gnutls)
  147. ;; XXX: Use this hack instead of #:autoload to avoid compilation errors.
  148. ;; See <http://bugs.gnu.org/12202>.
  149. (module-use! (resolve-module '(guix build download))
  150. (resolve-interface '(gnutls)))
  151. (set! load-gnutls (const #t)))
  152. (define %x509-certificate-directory
  153. ;; The directory where X.509 authority PEM certificates are stored.
  154. (make-parameter (or (getenv "GUIX_TLS_CERTIFICATE_DIRECTORY")
  155. (getenv "SSL_CERT_DIR") ;like OpenSSL
  156. "/etc/ssl/certs")))
  157. (define (set-certificate-credentials-x509-trust-file!* cred file format)
  158. "Like 'set-certificate-credentials-x509-trust-file!', but without the file
  159. name decoding bug described at
  160. <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=26948#17>."
  161. (let ((data (call-with-input-file file get-bytevector-all)))
  162. (set-certificate-credentials-x509-trust-data! cred data format)))
  163. (define make-credentials-with-ca-trust-files
  164. (mlambda (directory)
  165. "Return certificate credentials with X.509 authority certificates read from
  166. DIRECTORY. Those authority certificates are checked when
  167. 'peer-certificate-status' is later called."
  168. ;; Memoize the result to avoid scanning all the certificates every time a
  169. ;; connection is made.
  170. (let ((cred (make-certificate-credentials))
  171. (files (match (scandir directory (cut string-suffix? ".pem" <>))
  172. ((or #f ())
  173. ;; Some distros provide nothing but bundles (*.crt) under
  174. ;; /etc/ssl/certs, so look for them.
  175. (or (scandir directory (cut string-suffix? ".crt" <>))
  176. '()))
  177. (pem pem))))
  178. (for-each (lambda (file)
  179. (let ((file (string-append directory "/" file)))
  180. ;; Protect against dangling symlinks.
  181. (when (file-exists? file)
  182. (set-certificate-credentials-x509-trust-file!*
  183. cred file
  184. x509-certificate-format/pem))))
  185. files)
  186. cred)))
  187. (define (peer-certificate session)
  188. "Return the certificate of the remote peer in SESSION."
  189. (match (session-peer-certificate-chain session)
  190. ((first _ ...)
  191. (import-x509-certificate first x509-certificate-format/der))))
  192. (define (assert-valid-server-certificate session server)
  193. "Return #t if the certificate of the remote peer for SESSION is a valid
  194. certificate for SERVER, where SERVER is the expected host name of peer."
  195. (define cert
  196. (peer-certificate session))
  197. ;; First check whether the server's certificate matches SERVER.
  198. (unless (x509-certificate-matches-hostname? cert server)
  199. (throw 'tls-certificate-error 'host-mismatch cert server))
  200. ;; Second check its validity and reachability from the set of authority
  201. ;; certificates loaded via 'set-certificate-credentials-x509-trust-file!'.
  202. (match (peer-certificate-status session)
  203. (() ;certificate is valid
  204. #t)
  205. ((statuses ...)
  206. (throw 'tls-certificate-error 'invalid-certificate cert server
  207. statuses))))
  208. (define (print-tls-certificate-error port key args default-printer)
  209. "Print the TLS certificate error represented by ARGS in an intelligible
  210. way."
  211. (match args
  212. (('host-mismatch cert server)
  213. (format port
  214. "X.509 server certificate for '~a' does not match: ~a~%"
  215. server (x509-certificate-dn cert)))
  216. (('invalid-certificate cert server statuses)
  217. (format port
  218. "X.509 certificate of '~a' could not be verified:~%~{ ~a~%~}"
  219. server
  220. (map certificate-status->string statuses)))))
  221. (set-exception-printer! 'tls-certificate-error
  222. print-tls-certificate-error)
  223. (define (wrap-record-port-for-gnutls<3.7.7 record port)
  224. "Return a port that wraps RECORD to ensure that closing it also closes PORT,
  225. the actual socket port, and its file descriptor. Make sure it does not
  226. introduce extra buffering (custom ports are buffered by default as of Guile
  227. 3.0.5).
  228. This wrapper is unnecessary with GnuTLS >= 3.7.7, which can automatically
  229. close SESSION's file descriptor when RECORD is closed."
  230. (define (read! bv start count)
  231. (define read
  232. (catch 'gnutls-error
  233. (lambda ()
  234. (get-bytevector-n! record bv start count))
  235. (lambda (key err proc . rest)
  236. ;; When responding to "Connection: close" requests, some servers
  237. ;; close the connection abruptly after sending the response body,
  238. ;; without doing a proper TLS connection termination. Treat it as
  239. ;; EOF. This is fixed in GnuTLS 3.7.7.
  240. (if (eq? err error/premature-termination)
  241. the-eof-object
  242. (apply throw key err proc rest)))))
  243. (if (eof-object? read)
  244. 0
  245. read))
  246. (define (write! bv start count)
  247. (put-bytevector record bv start count)
  248. (force-output record)
  249. count)
  250. (define (get-position)
  251. (port-position record))
  252. (define (set-position! new-position)
  253. (set-port-position! record new-position))
  254. (define (close)
  255. (unless (port-closed? port)
  256. (close-port port))
  257. (unless (port-closed? record)
  258. (close-port record)))
  259. (define (unbuffered port)
  260. (setvbuf port 'none)
  261. port)
  262. (unbuffered
  263. (make-custom-binary-input/output-port "gnutls wrapped port" read! write!
  264. get-position set-position!
  265. close)))
  266. (define* (tls-wrap port server #:key (verify-certificate? #t))
  267. "Return PORT wrapped in a TLS connection to SERVER. SERVER must be a DNS
  268. host name without trailing dot."
  269. (define (log level str)
  270. (format (current-error-port)
  271. "gnutls: [~a|~a] ~a" (getpid) level str))
  272. (load-gnutls)
  273. (let ((session (make-session connection-end/client))
  274. (ca-certs (%x509-certificate-directory)))
  275. ;; Some servers such as 'cloud.github.com' require the client to support
  276. ;; the 'SERVER NAME' extension. However, 'set-session-server-name!' is
  277. ;; not available in older GnuTLS releases. See
  278. ;; <http://bugs.gnu.org/18526> for details.
  279. (if (module-defined? (resolve-interface '(gnutls))
  280. 'set-session-server-name!)
  281. (set-session-server-name! session server-name-type/dns server)
  282. (format (current-error-port)
  283. "warning: TLS 'SERVER NAME' extension not supported~%"))
  284. (set-session-transport-fd! session (fileno port))
  285. (set-session-default-priority! session)
  286. ;; The "%COMPAT" bit allows us to work around firewall issues (info
  287. ;; "(gnutls) Priority Strings"); see <http://bugs.gnu.org/23311>.
  288. ;; Explicitly disable SSLv3, which is insecure:
  289. ;; <https://tools.ietf.org/html/rfc7568>.
  290. (set-session-priorities! session "NORMAL:%COMPAT:-VERS-SSL3.0")
  291. (set-session-credentials! session
  292. (if (and verify-certificate? ca-certs)
  293. (make-credentials-with-ca-trust-files
  294. ca-certs)
  295. (make-certificate-credentials)))
  296. ;; Uncomment the following lines in case of debugging emergency.
  297. ;;(set-log-level! 10)
  298. ;;(set-log-procedure! log)
  299. (let loop ((retries 5))
  300. (catch 'gnutls-error
  301. (lambda ()
  302. (handshake session))
  303. (lambda (key err proc . rest)
  304. (cond ((eq? err error/warning-alert-received)
  305. ;; Like Wget, do no stop upon non-fatal alerts such as
  306. ;; 'alert-description/unrecognized-name'.
  307. (format (current-error-port)
  308. "warning: TLS warning alert received: ~a~%"
  309. (alert-description->string (alert-get session)))
  310. (handshake session))
  311. (else
  312. (if (or (fatal-error? err) (zero? retries))
  313. (apply throw key err proc rest)
  314. (begin
  315. ;; We got 'error/again' or similar; try again.
  316. (format (current-error-port)
  317. "warning: TLS non-fatal error: ~a~%"
  318. (error->string err))
  319. (loop (- retries 1)))))))))
  320. ;; Verify the server's certificate if needed.
  321. (when verify-certificate?
  322. (catch 'tls-certificate-error
  323. (lambda ()
  324. (assert-valid-server-certificate session server))
  325. (lambda args
  326. (close-port port)
  327. (apply throw args))))
  328. (let ((record (session-record-port session)))
  329. (setvbuf record 'block)
  330. (if (module-defined? (resolve-interface '(gnutls))
  331. 'set-session-record-port-close!) ;GnuTLS >= 3.7.7
  332. (let ((close-wrapped-port (lambda (_) (close-port port))))
  333. (set-session-record-port-close! record close-wrapped-port)
  334. record)
  335. (wrap-record-port-for-gnutls<3.7.7 record port)))))
  336. (define (ensure-uri uri-or-string) ;XXX: copied from (web http)
  337. (cond
  338. ((string? uri-or-string) (string->uri uri-or-string))
  339. ((uri? uri-or-string) uri-or-string)
  340. (else (error "Invalid URI" uri-or-string))))
  341. (define* (open-socket-for-uri uri-or-string #:key timeout)
  342. "Return an open input/output port for a connection to URI. When TIMEOUT is
  343. not #f, it must be a (possibly inexact) number denoting the maximum duration
  344. in seconds to wait for the connection to complete; passed TIMEOUT, an
  345. ETIMEDOUT error is raised."
  346. ;; Includes a fix for <http://bugs.gnu.org/15368> which affects Guile's
  347. ;; 'open-socket-for-uri' up to 2.0.11 included, uses 'connect*' instead
  348. ;; of 'connect', and uses AI_ADDRCONFIG.
  349. (define http-proxy (current-http-proxy))
  350. (define uri (ensure-uri (or http-proxy uri-or-string)))
  351. (define addresses
  352. (let ((port (uri-port uri)))
  353. (delete-duplicates
  354. (getaddrinfo (uri-host uri)
  355. (cond (port => number->string)
  356. (else (symbol->string (uri-scheme uri))))
  357. (if (number? port)
  358. (logior AI_ADDRCONFIG AI_NUMERICSERV)
  359. AI_ADDRCONFIG))
  360. (lambda (ai1 ai2)
  361. (equal? (addrinfo:addr ai1) (addrinfo:addr ai2))))))
  362. (let loop ((addresses addresses))
  363. (let* ((ai (car addresses))
  364. (s (with-fluids ((%default-port-encoding #f))
  365. ;; Restrict ourselves to TCP.
  366. (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP))))
  367. (catch 'system-error
  368. (lambda ()
  369. (connect* s (addrinfo:addr ai) timeout)
  370. ;; Buffer input and output on this port.
  371. (setvbuf s 'block)
  372. ;; If we're using a proxy, make a note of that.
  373. (when http-proxy (set-http-proxy-port?! s #t))
  374. s)
  375. (lambda args
  376. ;; Connection failed, so try one of the other addresses.
  377. (close s)
  378. (if (null? (cdr addresses))
  379. (apply throw args)
  380. (loop (cdr addresses))))))))
  381. (define (setup-http-tunnel port uri)
  382. "Establish over PORT an HTTP tunnel to the destination server of URI."
  383. (define target
  384. (string-append (uri-host uri) ":"
  385. (number->string
  386. (or (uri-port uri)
  387. (match (uri-scheme uri)
  388. ('http 80)
  389. ('https 443))))))
  390. (format port "CONNECT ~a HTTP/1.1\r\n" target)
  391. (format port "Host: ~a\r\n\r\n" target)
  392. (force-output port)
  393. (read-response port))
  394. (define* (open-connection-for-uri uri
  395. #:key
  396. timeout
  397. (verify-certificate? #t))
  398. "Like 'open-socket-for-uri', but also handle HTTPS connections. When
  399. VERIFY-CERTIFICATE? is true, verify HTTPS server certificates."
  400. ;; Note: Guile 2.2.0's (web client) has a same-named export that's actually
  401. ;; undefined. See Guile commit 011669af3b428e5626f7bbf66b11d57d9768c047.
  402. (define https?
  403. (eq? 'https (uri-scheme uri)))
  404. (define https-proxy (let ((proxy (getenv "https_proxy")))
  405. (and (not (equal? proxy ""))
  406. proxy)))
  407. (let-syntax ((with-https-proxy
  408. (syntax-rules ()
  409. ((_ exp)
  410. ;; For HTTPS URIs, honor 'https_proxy', not 'http_proxy'.
  411. (let ((thunk (lambda () exp)))
  412. (if (and https?
  413. (module-variable
  414. (resolve-interface '(web client))
  415. 'current-http-proxy))
  416. (parameterize ((current-http-proxy https-proxy))
  417. (thunk))
  418. (thunk)))))))
  419. (with-https-proxy
  420. (let ((s (open-socket-for-uri uri #:timeout timeout)))
  421. ;; Buffer input and output on this port.
  422. (setvbuf s 'block %http-receive-buffer-size)
  423. (when (and https? https-proxy)
  424. (setup-http-tunnel s uri))
  425. (if https?
  426. (tls-wrap s (uri-host uri)
  427. #:verify-certificate? verify-certificate?)
  428. s)))))
  429. (define (close-connection port) ;deprecated
  430. (unless (port-closed? port)
  431. (close-port port)))
  432. ;; XXX: This is an awful hack to make sure the (set-port-encoding! p
  433. ;; "ISO-8859-1") call in `read-response' passes, even during bootstrap
  434. ;; where iconv is not available.
  435. (module-define! (resolve-module '(web response))
  436. 'set-port-encoding!
  437. (lambda (p e) #f))
  438. (define (resolve-uri-reference ref base)
  439. "Resolve the URI reference REF, interpreted relative to the BASE URI, into a
  440. target URI, according to the algorithm specified in RFC 3986 section 5.2.2.
  441. Return the resulting target URI."
  442. (define (merge-paths base-path rel-path)
  443. (let* ((base-components (string-split base-path #\/))
  444. (base-directory-components (match base-components
  445. ((components ... last) components)
  446. (() '())))
  447. (base-directory (string-join base-directory-components "/")))
  448. (string-append base-directory "/" rel-path)))
  449. (define (remove-dot-segments path)
  450. (let loop ((in
  451. ;; Drop leading "." and ".." components from a relative path.
  452. ;; (absolute paths will start with a "" component)
  453. (drop-while (match-lambda
  454. ((or "." "..") #t)
  455. (_ #f))
  456. (string-split path #\/)))
  457. (out '()))
  458. (match in
  459. (("." . rest)
  460. (loop rest out))
  461. ((".." . rest)
  462. (match out
  463. ((or () (""))
  464. (error "remove-dot-segments: too many '..' components" path))
  465. (_
  466. (loop rest (cdr out)))))
  467. ((component . rest)
  468. (loop rest (cons component out)))
  469. (()
  470. (string-join (reverse out) "/")))))
  471. (cond ((or (uri-scheme ref)
  472. (uri-host ref))
  473. (build-uri (or (uri-scheme ref)
  474. (uri-scheme base))
  475. #:userinfo (uri-userinfo ref)
  476. #:host (uri-host ref)
  477. #:port (uri-port ref)
  478. #:path (remove-dot-segments (uri-path ref))
  479. #:query (uri-query ref)
  480. #:fragment (uri-fragment ref)))
  481. ((string-null? (uri-path ref))
  482. (build-uri (uri-scheme base)
  483. #:userinfo (uri-userinfo base)
  484. #:host (uri-host base)
  485. #:port (uri-port base)
  486. #:path (remove-dot-segments (uri-path base))
  487. #:query (or (uri-query ref)
  488. (uri-query base))
  489. #:fragment (uri-fragment ref)))
  490. (else
  491. (build-uri (uri-scheme base)
  492. #:userinfo (uri-userinfo base)
  493. #:host (uri-host base)
  494. #:port (uri-port base)
  495. #:path (remove-dot-segments
  496. (if (string-prefix? "/" (uri-path ref))
  497. (uri-path ref)
  498. (merge-paths (uri-path base)
  499. (uri-path ref))))
  500. #:query (uri-query ref)
  501. #:fragment (uri-fragment ref)))))
  502. (define* (http-fetch uri #:key timeout (verify-certificate? #t))
  503. "Return an input port containing the data at URI, and the expected number of
  504. bytes available or #f. When TIMEOUT is true, bail out if the connection could
  505. not be established in less than TIMEOUT seconds. When VERIFY-CERTIFICATE? is
  506. true, verify HTTPS certificates; otherwise simply ignore them."
  507. (define headers
  508. `(;; Some web sites, such as http://dist.schmorp.de, would block you if
  509. ;; there's no 'User-Agent' header, presumably on the assumption that
  510. ;; you're a spammer. So work around that.
  511. (User-Agent . "GNU Guile")
  512. ;; Some servers, such as https://alioth.debian.org, return "406 Not
  513. ;; Acceptable" when not explicitly told that everything is accepted.
  514. (Accept . "*/*")
  515. ;; Basic authentication, if needed.
  516. ,@(match (uri-userinfo uri)
  517. ((? string? str)
  518. `((Authorization . ,(string-append "Basic "
  519. (base64-encode
  520. (string->utf8 str))))))
  521. (_ '()))))
  522. (let*-values (((connection)
  523. (open-connection-for-uri uri
  524. #:timeout timeout
  525. #:verify-certificate?
  526. verify-certificate?))
  527. ((resp port)
  528. (http-get uri #:port connection #:decode-body? #f
  529. #:streaming? #t
  530. #:headers headers))
  531. ((code)
  532. (response-code resp)))
  533. (case code
  534. ((200) ; OK
  535. (values port (response-content-length resp)))
  536. ((301 ; moved permanently
  537. 302 ; found (redirection)
  538. 303 ; see other
  539. 307 ; temporary redirection
  540. 308) ; permanent redirection
  541. (let ((uri (resolve-uri-reference (response-location resp) uri)))
  542. (format #t "following redirection to `~a'...~%"
  543. (uri->string uri))
  544. (close connection)
  545. (http-fetch uri
  546. #:timeout timeout
  547. #:verify-certificate? verify-certificate?)))
  548. (else
  549. (error "download failed" (uri->string uri)
  550. code (response-reason-phrase resp))))))
  551. (define-syntax-rule (false-if-exception* body ...)
  552. "Like `false-if-exception', but print the exception on the error port."
  553. (catch #t
  554. (lambda ()
  555. body ...)
  556. (lambda (key . args)
  557. #f)
  558. (lambda (key . args)
  559. (print-exception (current-error-port) #f key args))))
  560. (define (uri-vicinity dir file)
  561. "Concatenate DIR, slash, and FILE, keeping only one slash in between.
  562. This is required by some HTTP servers."
  563. (string-append (string-trim-right dir #\/) "/"
  564. (string-trim file #\/)))
  565. (define (maybe-expand-mirrors uri mirrors)
  566. "If URI uses the 'mirror' scheme, expand it according to the MIRRORS alist.
  567. Return a list of URIs."
  568. (case (uri-scheme uri)
  569. ((mirror)
  570. (let ((kind (string->symbol (uri-host uri)))
  571. (path (uri-path uri)))
  572. (match (assoc-ref mirrors kind)
  573. ((mirrors ..1)
  574. (map (compose string->uri (cut uri-vicinity <> path))
  575. mirrors))
  576. (_
  577. (error "unsupported URL mirror kind" kind uri)))))
  578. (else
  579. (list uri))))
  580. (define* (disarchive-fetch/any uris file
  581. #:key (timeout 10) (verify-certificate? #t))
  582. "Fetch a Disarchive specification from any of URIS, assemble it,
  583. and write the output to FILE."
  584. (define (fetch-specification uris)
  585. (any (lambda (uri)
  586. (false-if-exception*
  587. (let-values (((port size) (http-fetch uri
  588. #:verify-certificate?
  589. verify-certificate?
  590. #:timeout timeout)))
  591. (format #t "Retrieving Disarchive spec from ~a ...~%"
  592. (uri->string uri))
  593. (let ((specification (read port)))
  594. (close-port port)
  595. specification))))
  596. uris))
  597. (define (resolve addresses output)
  598. (any (match-lambda
  599. (('swhid swhid)
  600. (match (string-split swhid #\:)
  601. (("swh" "1" "dir" id)
  602. (format #t "Downloading ~a from Software Heritage...~%" file)
  603. (false-if-exception*
  604. (swh-download-directory id output)))
  605. (_ #f)))
  606. (_ #f))
  607. addresses))
  608. (format #t "Trying to use Disarchive to assemble ~a...~%" file)
  609. (match (and=> (resolve-module '(disarchive) #:ensure #f)
  610. (lambda (disarchive)
  611. (cons (module-ref disarchive '%disarchive-log-port)
  612. (module-ref disarchive 'disarchive-assemble))))
  613. (#f (format #t "could not load Disarchive~%")
  614. #f)
  615. ((%disarchive-log-port . disarchive-assemble)
  616. (match (fetch-specification uris)
  617. (#f (format #t "could not find its Disarchive specification~%")
  618. #f)
  619. (spec (parameterize ((%disarchive-log-port (current-output-port))
  620. (%verify-swh-certificate? verify-certificate?))
  621. (false-if-exception*
  622. (disarchive-assemble spec file #:resolver resolve))))))))
  623. (define (internet-archive-uri uri)
  624. "Return a URI corresponding to an Internet Archive backup of URI, or #f if
  625. URI does not denote a Web URI."
  626. (and (memq (uri-scheme uri) '(http https))
  627. (let* ((now (time-utc->date (current-time time-utc)))
  628. (date (date->string now "~Y~m~d~H~M~S")))
  629. ;; Note: the date in the URL can be anything and web.archive.org
  630. ;; automatically redirects to the closest date.
  631. (build-uri 'https #:host "web.archive.org"
  632. #:path (string-append "/web/" date "/"
  633. (uri->string uri))))))
  634. (define* (url-fetch url file
  635. #:key
  636. (timeout 10) (verify-certificate? #t)
  637. (mirrors '()) (content-addressed-mirrors '())
  638. (disarchive-mirrors '())
  639. (hashes '())
  640. print-build-trace?)
  641. "Fetch FILE from URL; URL may be either a single string, or a list of
  642. string denoting alternate URLs for FILE. Return #f on failure, and FILE
  643. on success.
  644. When MIRRORS is defined, it must be an alist of mirrors; it is used to resolve
  645. 'mirror://' URIs.
  646. HASHES must be a list of algorithm/hash pairs, where each algorithm is a
  647. symbol such as 'sha256 and each hash is a bytevector.
  648. CONTENT-ADDRESSED-MIRRORS must be a list of procedures that, given a hash
  649. algorithm and a hash, return a URL where the specified data can be retrieved
  650. or #f.
  651. When VERIFY-CERTIFICATE? is true, validate HTTPS server certificates;
  652. otherwise simply ignore them."
  653. (define uri
  654. (append-map (cut maybe-expand-mirrors <> mirrors)
  655. (match url
  656. ((_ ...) (map string->uri url))
  657. (_ (list (string->uri url))))))
  658. (define (fetch uri file)
  659. (format #t "~%Starting download of ~a~%From ~a...~%"
  660. file (uri->string uri))
  661. (case (uri-scheme uri)
  662. ((http https)
  663. (false-if-exception*
  664. (let-values (((port size)
  665. (http-fetch uri
  666. #:verify-certificate? verify-certificate?
  667. #:timeout timeout)))
  668. (call-with-output-file file
  669. (lambda (output)
  670. (dump-port* port output
  671. #:buffer-size %http-receive-buffer-size
  672. #:reporter (if print-build-trace?
  673. (progress-reporter/trace
  674. file (uri->string uri) size)
  675. (progress-reporter/file
  676. (uri-abbreviation uri) size)))
  677. (newline)))
  678. (close-port port)
  679. file)))
  680. ((ftp)
  681. (false-if-exception* (ftp-fetch uri file
  682. #:timeout timeout
  683. #:print-build-trace?
  684. print-build-trace?)))
  685. (else
  686. (format #t "skipping URI with unsupported scheme: ~s~%"
  687. uri)
  688. #f)))
  689. (define content-addressed-uris
  690. (append-map (lambda (make-url)
  691. (filter-map (match-lambda
  692. ((hash-algo . hash)
  693. (let ((file (strip-store-file-name file)))
  694. (string->uri (make-url file hash-algo hash)))))
  695. hashes))
  696. content-addressed-mirrors))
  697. (define disarchive-uris
  698. (append-map (lambda (mirror)
  699. (let ((make-url (match mirror
  700. ((? string?)
  701. (lambda (hash-algo hash)
  702. (string-append
  703. mirror
  704. (symbol->string hash-algo) "/"
  705. (bytevector->base16-string hash))))
  706. ((? procedure?)
  707. mirror))))
  708. (map (match-lambda
  709. ((hash-algo . hash)
  710. (string->uri (make-url hash-algo hash))))
  711. hashes)))
  712. disarchive-mirrors))
  713. ;; Make this unbuffered so 'progress-report/file' works as expected. 'line
  714. ;; means '\n', not '\r', so it's not appropriate here.
  715. (setvbuf (current-output-port) 'none)
  716. (setvbuf (current-error-port) 'line)
  717. (let try ((uri (append uri content-addressed-uris
  718. (match uri
  719. ((first . _)
  720. (or (and=> (internet-archive-uri first) list)
  721. '()))
  722. (() '())))))
  723. (match uri
  724. ((uri tail ...)
  725. (or (fetch uri file)
  726. (try tail)))
  727. (()
  728. ;; If we are looking for a software archive, one last thing we
  729. ;; can try is to use Disarchive to assemble it.
  730. (or (disarchive-fetch/any disarchive-uris file
  731. #:verify-certificate? verify-certificate?
  732. #:timeout timeout)
  733. (begin
  734. (format (current-error-port) "failed to download ~s from ~s~%"
  735. file url)
  736. ;; Remove FILE in case we made an incomplete download, for
  737. ;; example due to ENOSPC.
  738. (catch 'system-error
  739. (lambda ()
  740. (delete-file file))
  741. (const #f))
  742. #f))))))
  743. ;;; download.scm ends here