swh.scm 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
  4. ;;; Copyright © 2021 Xinglu Chen <public@yoctocell.xyz>
  5. ;;;
  6. ;;; This file is part of GNU Guix.
  7. ;;;
  8. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  9. ;;; under the terms of the GNU General Public License as published by
  10. ;;; the Free Software Foundation; either version 3 of the License, or (at
  11. ;;; your option) any later version.
  12. ;;;
  13. ;;; GNU Guix is distributed in the hope that it will be useful, but
  14. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. ;;; GNU General Public License for more details.
  17. ;;;
  18. ;;; You should have received a copy of the GNU General Public License
  19. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  20. (define-module (guix swh)
  21. #:use-module (guix base16)
  22. #:use-module (guix build utils)
  23. #:use-module ((guix build syscalls) #:select (mkdtemp!))
  24. #:use-module (web uri)
  25. #:use-module (web client)
  26. #:use-module (web response)
  27. #:use-module (json)
  28. #:use-module (srfi srfi-1)
  29. #:use-module (srfi srfi-9)
  30. #:use-module (srfi srfi-11)
  31. #:use-module (srfi srfi-19)
  32. #:use-module (ice-9 match)
  33. #:use-module (ice-9 regex)
  34. #:use-module (ice-9 popen)
  35. #:use-module ((ice-9 ftw) #:select (scandir))
  36. #:export (%swh-base-url
  37. %verify-swh-certificate?
  38. %allow-request?
  39. request-rate-limit-reached?
  40. origin?
  41. origin-type
  42. origin-url
  43. origin-visits
  44. lookup-origin
  45. visit?
  46. visit-date
  47. visit-origin
  48. visit-url
  49. visit-snapshot-url
  50. visit-status
  51. visit-number
  52. visit-snapshot
  53. snapshot?
  54. snapshot-id
  55. snapshot-branches
  56. lookup-snapshot-branch
  57. branch?
  58. branch-name
  59. branch-target
  60. release?
  61. release-id
  62. release-name
  63. release-message
  64. release-target
  65. revision?
  66. revision-id
  67. revision-date
  68. revision-directory
  69. lookup-revision
  70. lookup-origin-revision
  71. content?
  72. content-checksums
  73. content-data-url
  74. content-length
  75. lookup-content
  76. directory-entry?
  77. directory-entry-name
  78. directory-entry-type
  79. directory-entry-checksums
  80. directory-entry-length
  81. directory-entry-permissions
  82. lookup-directory
  83. directory-entry-target
  84. save-reply?
  85. save-reply-origin-url
  86. save-reply-origin-type
  87. save-reply-request-date
  88. save-reply-request-status
  89. save-reply-task-status
  90. save-origin
  91. save-origin-status
  92. vault-reply?
  93. vault-reply-id
  94. vault-reply-fetch-url
  95. vault-reply-progress-message
  96. vault-reply-status
  97. vault-reply-swhid
  98. query-vault
  99. request-cooking
  100. vault-fetch
  101. commit-id?
  102. swh-download-directory
  103. swh-download))
  104. ;;; Commentary:
  105. ;;;
  106. ;;; This module provides bindings to the HTTP interface of Software Heritage.
  107. ;;; It allows you to browse the archive, look up revisions (such as SHA1
  108. ;;; commit IDs), "origins" (code hosting URLs), content (files), etc. See
  109. ;;; <https://archive.softwareheritage.org/api/> for more information.
  110. ;;;
  111. ;;; The high-level 'swh-download' procedure allows you to download a Git
  112. ;;; revision from Software Heritage, provided it is available.
  113. ;;;
  114. ;;; Code:
  115. (define %swh-base-url
  116. ;; Presumably we won't need to change it.
  117. (make-parameter "https://archive.softwareheritage.org"))
  118. (define %verify-swh-certificate?
  119. ;; Whether to verify the X.509 HTTPS certificate for %SWH-BASE-URL.
  120. (make-parameter #t))
  121. (define (swh-url path . rest)
  122. ;; URLs returned by the API may be relative or absolute. This has changed
  123. ;; without notice before. Handle both cases by detecting whether the path
  124. ;; starts with a domain.
  125. (define root
  126. (if (string-prefix? "/" path)
  127. (string-append (%swh-base-url) path)
  128. path))
  129. (define url
  130. (string-append root (string-join rest "/" 'prefix)))
  131. ;; Ensure there's a trailing slash or we get a redirect.
  132. (if (string-suffix? "/" url)
  133. url
  134. (string-append url "/")))
  135. ;; XXX: Work around a bug in Guile 3.0.2 where #:verify-certificate? would
  136. ;; be ignored (<https://bugs.gnu.org/40486>).
  137. (define* (http-get* uri #:rest rest)
  138. (apply http-request uri #:method 'GET rest))
  139. (define* (http-post* uri #:rest rest)
  140. (apply http-request uri #:method 'POST rest))
  141. (define %date-regexp
  142. ;; Match strings like "2014-11-17T22:09:38+01:00" or
  143. ;; "2018-09-30T23:20:07.815449+00:00"".
  144. (make-regexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})((\\.[0-9]+)?)([+-][0-9]{2}):([0-9]{2})$"))
  145. (define (string->date* str)
  146. "Return a SRFI-19 date parsed from STR, a date string as returned by
  147. Software Heritage."
  148. ;; We can't use 'string->date' because of the timezone format: SWH returns
  149. ;; "+01:00" when the '~z' template expects "+0100". So we roll our own!
  150. (or (and=> (regexp-exec %date-regexp str)
  151. (lambda (match)
  152. (define (ref n)
  153. (string->number (match:substring match n)))
  154. (make-date (let ((ns (match:substring match 8)))
  155. (if ns
  156. (string->number (string-drop ns 1))
  157. 0))
  158. (ref 6) (ref 5) (ref 4)
  159. (ref 3) (ref 2) (ref 1)
  160. (+ (* 3600 (ref 9)) ;time zone
  161. (if (< (ref 9) 0)
  162. (- (ref 10))
  163. (ref 10))))))
  164. str)) ;oops!
  165. (define (maybe-null proc)
  166. (match-lambda
  167. ((? null?) #f)
  168. ('null #f)
  169. (obj (proc obj))))
  170. (define string*
  171. ;; Converts "string or #nil" coming from JSON to "string or #f".
  172. (match-lambda
  173. ((? string? str) str)
  174. ((? null?) #f) ;Guile-JSON 3.x
  175. ('null #f))) ;Guile-JSON 4.x
  176. (define %allow-request?
  177. ;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true
  178. ;; to keep going. This can be used to disallow requests when
  179. ;; 'request-rate-limit-reached?' returns true, for instance.
  180. (make-parameter (const #t)))
  181. ;; The time when the rate limit for "/origin/save" POST requests and that of
  182. ;; other requests will be reset.
  183. ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
  184. (define %save-rate-limit-reset-time 0)
  185. (define %general-rate-limit-reset-time 0)
  186. (define (request-rate-limit-reached? url method)
  187. "Return true if the rate limit has been reached for URI."
  188. (define uri
  189. (string->uri url))
  190. (define reset-time
  191. (if (and (eq? method http-post*)
  192. (string-prefix? "/api/1/origin/save/" (uri-path uri)))
  193. %save-rate-limit-reset-time
  194. %general-rate-limit-reset-time))
  195. (< (car (gettimeofday)) reset-time))
  196. (define (update-rate-limit-reset-time! url method response)
  197. "Update the rate limit reset time for URL and METHOD based on the headers in
  198. RESPONSE."
  199. (let ((uri (string->uri url)))
  200. (match (assq-ref (response-headers response) 'x-ratelimit-reset)
  201. ((= string->number (? number? reset))
  202. (if (and (eq? method http-post*)
  203. (string-prefix? "/api/1/origin/save/" (uri-path uri)))
  204. (set! %save-rate-limit-reset-time reset)
  205. (set! %general-rate-limit-reset-time reset)))
  206. (_
  207. #f))))
  208. (define* (call url decode #:optional (method http-get*)
  209. #:key (false-if-404? #t))
  210. "Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
  211. using DECODE, a one-argument procedure that takes an input port. When
  212. FALSE-IF-404? is true, return #f upon 404 responses."
  213. (and ((%allow-request?) url method)
  214. (let*-values (((response port)
  215. (method url #:streaming? #t
  216. #:verify-certificate?
  217. (%verify-swh-certificate?))))
  218. ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
  219. (match (assq-ref (response-headers response) 'x-ratelimit-remaining)
  220. (#f #t)
  221. ((? (compose zero? string->number))
  222. (update-rate-limit-reset-time! url method response)
  223. (throw 'swh-error url method response))
  224. (_ #t))
  225. (cond ((= 200 (response-code response))
  226. (let ((result (decode port)))
  227. (close-port port)
  228. result))
  229. ((and false-if-404?
  230. (= 404 (response-code response)))
  231. (close-port port)
  232. #f)
  233. (else
  234. (close-port port)
  235. (throw 'swh-error url method response))))))
  236. (define-syntax define-query
  237. (syntax-rules (path)
  238. "Define a procedure that performs a Software Heritage query."
  239. ((_ (name args ...) docstring (path components ...)
  240. json->value)
  241. (define (name args ...)
  242. docstring
  243. (call (swh-url components ...) json->value)))))
  244. ;; <https://archive.softwareheritage.org/api/1/origin/https://github.com/guix-mirror/guix/get>
  245. (define-json-mapping <origin> make-origin origin?
  246. json->origin
  247. (visits-url origin-visits-url "origin_visits_url")
  248. (type origin-type)
  249. (url origin-url))
  250. ;; <https://archive.softwareheritage.org/api/1/origin/52181937/visits/>
  251. (define-json-mapping <visit> make-visit visit?
  252. json->visit
  253. (date visit-date "date" string->date*)
  254. (origin visit-origin)
  255. (url visit-url "origin_visit_url")
  256. (snapshot-url visit-snapshot-url "snapshot_url" string*) ;string | #f
  257. (status visit-status "status" string->symbol) ;'full | 'partial | 'ongoing
  258. (number visit-number "visit"))
  259. ;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/>
  260. (define-json-mapping <snapshot> make-snapshot snapshot?
  261. json->snapshot
  262. (id snapshot-id)
  263. (branches snapshot-branches "branches" json->branches))
  264. ;; This is used for the "branches" field of snapshots.
  265. (define-record-type <branch>
  266. (make-branch name target-type target-url)
  267. branch?
  268. (name branch-name)
  269. (target-type branch-target-type) ;release | revision
  270. (target-url branch-target-url))
  271. (define (json->branches branches)
  272. (map (match-lambda
  273. ((key . value)
  274. (make-branch key
  275. (string->symbol
  276. (assoc-ref value "target_type"))
  277. (assoc-ref value "target_url"))))
  278. branches))
  279. ;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/>
  280. (define-json-mapping <release> make-release release?
  281. json->release
  282. (id release-id)
  283. (name release-name)
  284. (message release-message)
  285. (target-type release-target-type "target_type" string->symbol)
  286. (target-url release-target-url "target_url"))
  287. ;; <https://archive.softwareheritage.org/api/1/revision/359fdda40f754bbf1b5dc261e7427b75463b59be/>
  288. ;; Note: Some revisions, such as those for "nixguix" origins (e.g.,
  289. ;; <https://archive.softwareheritage.org/api/1/revision/b8dbc65475bbedde8e015d4730ade8864c38fad3/>),
  290. ;; have their 'date' field set to null.
  291. (define-json-mapping <revision> make-revision revision?
  292. json->revision
  293. (id revision-id)
  294. (date revision-date "date" (maybe-null string->date*))
  295. (directory revision-directory)
  296. (directory-url revision-directory-url "directory_url"))
  297. ;; <https://archive.softwareheritage.org/api/1/content/>
  298. (define-json-mapping <content> make-content content?
  299. json->content
  300. (checksums content-checksums "checksums" json->checksums)
  301. (data-url content-data-url "data_url")
  302. (file-type-url content-file-type-url "filetype_url")
  303. (language-url content-language-url "language_url")
  304. (length content-length)
  305. (license-url content-license-url "license_url"))
  306. (define (json->checksums checksums)
  307. (map (match-lambda
  308. ((key . value)
  309. (cons key (base16-string->bytevector value))))
  310. checksums))
  311. ;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/>
  312. (define-json-mapping <directory-entry> make-directory-entry directory-entry?
  313. json->directory-entry
  314. (name directory-entry-name)
  315. (type directory-entry-type "type"
  316. (match-lambda
  317. ("dir" 'directory)
  318. (str (string->symbol str))))
  319. (checksums directory-entry-checksums "checksums"
  320. (match-lambda
  321. (#f #f)
  322. ((? unspecified?) #f)
  323. (lst (json->checksums lst))))
  324. (id directory-entry-id "dir_id")
  325. (length directory-entry-length)
  326. (permissions directory-entry-permissions "perms")
  327. (target-url directory-entry-target-url "target_url"))
  328. ;; <https://archive.softwareheritage.org/api/1/origin/save/>
  329. (define-json-mapping <save-reply> make-save-reply save-reply?
  330. json->save-reply
  331. (origin-url save-reply-origin-url "origin_url")
  332. (origin-type save-reply-origin-type "origin_type")
  333. (request-date save-reply-request-date "save_request_date"
  334. string->date*)
  335. (request-status save-reply-request-status "save_request_status"
  336. string->symbol)
  337. (task-status save-reply-task-status "save_task_status"
  338. (match-lambda
  339. ("not created" 'not-created)
  340. ((? string? str) (string->symbol str)))))
  341. ;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
  342. (define-json-mapping <vault-reply> make-vault-reply vault-reply?
  343. json->vault-reply
  344. (id vault-reply-id)
  345. (fetch-url vault-reply-fetch-url "fetch_url")
  346. (progress-message vault-reply-progress-message "progress_message")
  347. (status vault-reply-status "status" string->symbol)
  348. (swhid vault-reply-swhid))
  349. ;;;
  350. ;;; RPCs.
  351. ;;;
  352. (define-query (lookup-origin url)
  353. "Return an origin for URL."
  354. (path "/api/1/origin" url "get")
  355. json->origin)
  356. (define-query (lookup-content hash type)
  357. "Return a content for HASH, of the given TYPE--e.g., \"sha256\"."
  358. (path "/api/1/content"
  359. (string-append type ":"
  360. (bytevector->base16-string hash)))
  361. json->content)
  362. (define-query (lookup-revision id)
  363. "Return the revision with the given ID, typically a Git commit SHA1."
  364. (path "/api/1/revision" id)
  365. json->revision)
  366. (define-query (lookup-directory id)
  367. "Return the directory with the given ID."
  368. (path "/api/1/directory" id)
  369. json->directory-entries)
  370. (define (json->directory-entries port)
  371. (map json->directory-entry
  372. (vector->list (json->scm port))))
  373. (define (origin-visits origin)
  374. "Return the list of visits of ORIGIN, a record as returned by
  375. 'lookup-origin'."
  376. (call (swh-url (origin-visits-url origin))
  377. (lambda (port)
  378. (map json->visit (vector->list (json->scm port))))))
  379. (define (visit-snapshot visit)
  380. "Return the snapshot corresponding to VISIT or #f if no snapshot is
  381. available."
  382. (and (visit-snapshot-url visit)
  383. (call (swh-url (visit-snapshot-url visit))
  384. json->snapshot)))
  385. (define (snapshot-url snapshot branch-count first-branch)
  386. "Return the URL of SNAPSHOT such that it contains information for
  387. BRANCH-COUNT branches, starting at FIRST-BRANCH."
  388. (string-append (swh-url "/api/1/snapshot" (snapshot-id snapshot))
  389. "?branches_count=" (number->string branch-count)
  390. "&branches_from=" (uri-encode first-branch)))
  391. (define (lookup-snapshot-branch snapshot name)
  392. "Look up branch NAME on SNAPSHOT. Return the branch, or return #f if it
  393. could not be found."
  394. (or (find (lambda (branch)
  395. (string=? (branch-name branch) name))
  396. (snapshot-branches snapshot))
  397. ;; There's no API entry point to look up a snapshot branch by name.
  398. ;; Work around that by using the paginated list of branches provided by
  399. ;; the /api/1/snapshot API: ask for one branch, and start pagination at
  400. ;; NAME.
  401. (let ((snapshot (call (snapshot-url snapshot 1 name)
  402. json->snapshot)))
  403. (match (snapshot-branches snapshot)
  404. ((branch)
  405. (and (string=? (branch-name branch) name)
  406. branch))
  407. (_ #f)))))
  408. (define (branch-target branch)
  409. "Return the target of BRANCH, either a <revision> or a <release>."
  410. (match (branch-target-type branch)
  411. ('release
  412. (call (swh-url (branch-target-url branch))
  413. json->release))
  414. ('revision
  415. (call (swh-url (branch-target-url branch))
  416. json->revision))))
  417. (define (lookup-origin-revision url tag)
  418. "Return a <revision> corresponding to the given TAG for the repository
  419. coming from URL. Example:
  420. (lookup-origin-revision \"https://github.com/guix-mirror/guix/\" \"v0.8\")
  421. => #<<revision> id: \"44941…\" …>
  422. The information is based on the latest visit of URL available. Return #f if
  423. URL could not be found."
  424. (match (lookup-origin url)
  425. (#f #f)
  426. (origin
  427. (match (filter (lambda (visit)
  428. ;; Return #f if (visit-snapshot VISIT) would return #f.
  429. (and (visit-snapshot-url visit)
  430. (eq? 'full (visit-status visit))))
  431. (origin-visits origin))
  432. ((visit . _)
  433. (let ((snapshot (visit-snapshot visit)))
  434. (match (and=> (find (lambda (branch)
  435. (or
  436. ;; Git specific.
  437. (string=? (string-append "refs/tags/" tag)
  438. (branch-name branch))
  439. ;; Hg specific.
  440. (string=? tag
  441. (branch-name branch))))
  442. (snapshot-branches snapshot))
  443. branch-target)
  444. ((? release? release)
  445. (release-target release))
  446. ((? revision? revision)
  447. revision)
  448. (#f ;tag not found
  449. #f))))
  450. (()
  451. #f)))))
  452. (define (release-target release)
  453. "Return the revision that is the target of RELEASE."
  454. (match (release-target-type release)
  455. ('revision
  456. (call (swh-url (release-target-url release))
  457. json->revision))))
  458. (define (directory-entry-target entry)
  459. "If ENTRY, a directory entry, has type 'directory, return its list of
  460. directory entries; if it has type 'file, return its <content> object."
  461. (call (swh-url (directory-entry-target-url entry))
  462. (match (directory-entry-type entry)
  463. ('file json->content)
  464. ('directory json->directory-entries))))
  465. (define* (save-origin url #:optional (type "git"))
  466. "Request URL to be saved."
  467. (call (swh-url "/api/1/origin/save" type "url" url) json->save-reply
  468. http-post*))
  469. (define-query (save-origin-status url type)
  470. "Return the status of a /save request for URL and TYPE (e.g., \"git\")."
  471. (path "/api/1/origin/save" type "url" url)
  472. json->save-reply)
  473. (define* (vault-url id kind #:optional (archive-type 'flat))
  474. "Return the vault query/cooking URL for ID and KIND. Normally, ID is an
  475. SWHID and KIND is #f; the deprecated convention is to set ID to a raw
  476. directory or revision ID and KIND to 'revision or 'directory."
  477. ;; Note: /api/1/vault/directory/ID was deprecated in favor of
  478. ;; /api/1/vault/flat/SWHID; this procedure "converts" automatically.
  479. (let ((id (match kind
  480. ('directory (string-append "swh:1:dir:" id))
  481. ('revision (string-append "swh:1:rev:" id))
  482. (#f id))))
  483. (swh-url "/api/1/vault" (symbol->string archive-type) id)))
  484. (define* (query-vault id #:optional kind #:key (archive-type 'flat))
  485. "Ask the availability of object ID (an SWHID) to the vault. Return #f if it
  486. could not be found, or a <vault-reply> on success. ARCHIVE-TYPE can be 'flat
  487. for a tarball containing a directory, or 'git-bare for a tarball containing a
  488. bare Git repository corresponding to a revision.
  489. Passing KIND (one of 'directory or 'revision) together with a raw revision or
  490. directory identifier is deprecated."
  491. (call (vault-url id kind archive-type)
  492. json->vault-reply))
  493. (define* (request-cooking id #:optional kind #:key (archive-type 'flat))
  494. "Request the cooking of object ID, an SWHID. Return a <vault-reply>.
  495. ARCHIVE-TYPE can be 'flat for a tarball containing a directory, or 'git-bare
  496. for a tarball containing a bare Git repository corresponding to a revision.
  497. Passing KIND (one of 'directory or 'revision) together with a raw revision or
  498. directory identifier is deprecated."
  499. (call (vault-url id kind archive-type)
  500. json->vault-reply
  501. http-post*))
  502. (define* (vault-fetch id
  503. #:optional kind
  504. #:key
  505. (archive-type 'flat)
  506. (log-port (current-error-port)))
  507. "Return an input port from which a bundle of the object with the given ID,
  508. an SWHID, or #f if the object could not be found.
  509. ARCHIVE-TYPE can be 'flat for a tarball containing a directory, or 'git-bare
  510. for a tarball containing a bare Git repository corresponding to a revision."
  511. (let loop ((reply (query-vault id kind
  512. #:archive-type archive-type)))
  513. (match reply
  514. (#f
  515. (and=> (request-cooking id kind
  516. #:archive-type archive-type)
  517. loop))
  518. (_
  519. (match (vault-reply-status reply)
  520. ('done
  521. ;; Fetch the bundle.
  522. (let-values (((response port)
  523. (http-get* (swh-url (vault-reply-fetch-url reply))
  524. #:streaming? #t
  525. #:verify-certificate?
  526. (%verify-swh-certificate?))))
  527. (if (= (response-code response) 200)
  528. port
  529. (begin ;shouldn't happen
  530. (close-port port)
  531. #f))))
  532. ('failed
  533. ;; Upon failure, we're supposed to try again.
  534. (format log-port "SWH vault: failure: ~a~%"
  535. (vault-reply-progress-message reply))
  536. (format log-port "SWH vault: retrying...~%")
  537. (loop (request-cooking id kind
  538. #:archive-type archive-type)))
  539. ((and (or 'new 'pending) status)
  540. ;; Wait until the bundle shows up.
  541. (let ((message (vault-reply-progress-message reply)))
  542. (when (eq? 'new status)
  543. (format log-port "SWH vault: \
  544. requested bundle cooking, waiting for completion...~%"))
  545. (when (string? message)
  546. (format log-port "SWH vault: ~a~%" message))
  547. ;; Wait long enough so we don't exhaust our maximum number of
  548. ;; requests per hour too fast (as of this writing, the limit is 60
  549. ;; requests per hour per IP address.)
  550. (sleep (if (eq? status 'new) 60 30))
  551. (loop (query-vault id kind
  552. #:archive-type archive-type)))))))))
  553. ;;;
  554. ;;; High-level interface.
  555. ;;;
  556. (define (call-with-temporary-directory proc) ;FIXME: factorize
  557. "Call PROC with a name of a temporary directory; close the directory and
  558. delete it when leaving the dynamic extent of this call."
  559. (let* ((directory (or (getenv "TMPDIR") "/tmp"))
  560. (template (string-append directory "/guix-directory.XXXXXX"))
  561. (tmp-dir (mkdtemp! template)))
  562. (dynamic-wind
  563. (const #t)
  564. (lambda ()
  565. (proc tmp-dir))
  566. (lambda ()
  567. (false-if-exception (delete-file-recursively tmp-dir))))))
  568. (define* (swh-download-directory id output
  569. #:key (log-port (current-error-port)))
  570. "Download from Software Heritage the directory with the given ID, and
  571. unpack it to OUTPUT. Return #t on success and #f on failure"
  572. (call-with-temporary-directory
  573. (lambda (directory)
  574. (match (vault-fetch id 'directory #:log-port log-port)
  575. (#f
  576. (format log-port
  577. "SWH: directory ~a could not be fetched from the vault~%"
  578. id)
  579. #f)
  580. ((? port? input)
  581. (let ((tar (open-pipe* OPEN_WRITE "tar" "-C" directory "-xzvf" "-")))
  582. (dump-port input tar)
  583. (close-port input)
  584. (let ((status (close-pipe tar)))
  585. (unless (zero? status)
  586. (error "tar extraction failure" status)))
  587. (match (scandir directory)
  588. (("." ".." sub-directory)
  589. (copy-recursively (string-append directory "/" sub-directory)
  590. output
  591. #:log (%make-void-port "w"))
  592. #t))))))))
  593. (define (commit-id? reference)
  594. "Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if
  595. it is a tag name. This is based on a simple heuristic so use with care!"
  596. (and (= (string-length reference) 40)
  597. (string-every char-set:hex-digit reference)))
  598. (define* (swh-download url reference output
  599. #:key (log-port (current-error-port)))
  600. "Download from Software Heritage a checkout of the Git tag or commit
  601. REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success
  602. and #f on failure.
  603. This procedure uses the \"vault\", which contains \"cooked\" directories in
  604. the form of tarballs. If the requested directory is not cooked yet, it will
  605. wait until it becomes available, which could take several minutes."
  606. (match (if (commit-id? reference)
  607. (lookup-revision reference)
  608. (lookup-origin-revision url reference))
  609. ((? revision? revision)
  610. (format log-port "SWH: found revision ~a with directory at '~a'~%"
  611. (revision-id revision)
  612. (swh-url (revision-directory-url revision)))
  613. (swh-download-directory (revision-directory revision) output
  614. #:log-port log-port))
  615. (#f
  616. (format log-port
  617. "SWH: revision ~s originating from ~a could not be found~%"
  618. reference url)
  619. #f)))