swh.scm 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
  4. ;;; Copyright © 2021 Xinglu Chen <public@yoctocell.xyz>
  5. ;;; Copyright © 2021 Simon Tournier <zimon.toutoune@gmail.com>
  6. ;;;
  7. ;;; This file is part of GNU Guix.
  8. ;;;
  9. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  10. ;;; under the terms of the GNU General Public License as published by
  11. ;;; the Free Software Foundation; either version 3 of the License, or (at
  12. ;;; your option) any later version.
  13. ;;;
  14. ;;; GNU Guix is distributed in the hope that it will be useful, but
  15. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. ;;; GNU General Public License for more details.
  18. ;;;
  19. ;;; You should have received a copy of the GNU General Public License
  20. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  21. (define-module (guix swh)
  22. #:use-module (guix base16)
  23. #:use-module (guix build utils)
  24. #:use-module ((guix build syscalls) #:select (mkdtemp!))
  25. #:use-module (web uri)
  26. #:use-module (web client)
  27. #:use-module (web response)
  28. #:use-module (json)
  29. #:use-module (srfi srfi-1)
  30. #:use-module (srfi srfi-9)
  31. #:use-module (srfi srfi-11)
  32. #:use-module (srfi srfi-19)
  33. #:use-module (ice-9 match)
  34. #:use-module (ice-9 regex)
  35. #:use-module (ice-9 popen)
  36. #:use-module ((ice-9 ftw) #:select (scandir))
  37. #:export (%swh-base-url
  38. %verify-swh-certificate?
  39. %allow-request?
  40. request-rate-limit-reached?
  41. origin?
  42. origin-type
  43. origin-url
  44. origin-visits
  45. lookup-origin
  46. visit?
  47. visit-date
  48. visit-origin
  49. visit-url
  50. visit-snapshot-url
  51. visit-status
  52. visit-number
  53. visit-snapshot
  54. snapshot?
  55. snapshot-id
  56. snapshot-branches
  57. lookup-snapshot-branch
  58. branch?
  59. branch-name
  60. branch-target
  61. release?
  62. release-id
  63. release-name
  64. release-message
  65. release-target
  66. revision?
  67. revision-id
  68. revision-date
  69. revision-directory
  70. lookup-revision
  71. lookup-origin-revision
  72. content?
  73. content-checksums
  74. content-data-url
  75. content-length
  76. lookup-content
  77. directory-entry?
  78. directory-entry-name
  79. directory-entry-type
  80. directory-entry-checksums
  81. directory-entry-length
  82. directory-entry-permissions
  83. lookup-directory
  84. directory-entry-target
  85. save-reply?
  86. save-reply-origin-url
  87. save-reply-origin-type
  88. save-reply-request-date
  89. save-reply-request-status
  90. save-reply-task-status
  91. save-origin
  92. save-origin-status
  93. vault-reply?
  94. vault-reply-id
  95. vault-reply-fetch-url
  96. vault-reply-progress-message
  97. vault-reply-status
  98. vault-reply-swhid
  99. query-vault
  100. request-cooking
  101. vault-fetch
  102. commit-id?
  103. swh-download-directory
  104. swh-download))
  105. ;;; Commentary:
  106. ;;;
  107. ;;; This module provides bindings to the HTTP interface of Software Heritage.
  108. ;;; It allows you to browse the archive, look up revisions (such as SHA1
  109. ;;; commit IDs), "origins" (code hosting URLs), content (files), etc. See
  110. ;;; <https://archive.softwareheritage.org/api/> for more information.
  111. ;;;
  112. ;;; The high-level 'swh-download' procedure allows you to download a Git
  113. ;;; revision from Software Heritage, provided it is available.
  114. ;;;
  115. ;;; Code:
  116. (define %swh-base-url
  117. ;; Presumably we won't need to change it.
  118. (make-parameter "https://archive.softwareheritage.org"))
  119. (define %verify-swh-certificate?
  120. ;; Whether to verify the X.509 HTTPS certificate for %SWH-BASE-URL.
  121. (make-parameter #t))
  122. ;; Token from an account to the Software Heritage Authentication service
  123. ;; <https://archive.softwareheritage.org/api/>
  124. (define %swh-token
  125. (make-parameter (and=> (getenv "GUIX_SWH_TOKEN")
  126. string->symbol)))
  127. (define (swh-url path . rest)
  128. ;; URLs returned by the API may be relative or absolute. This has changed
  129. ;; without notice before. Handle both cases by detecting whether the path
  130. ;; starts with a domain.
  131. (define root
  132. (if (string-prefix? "/" path)
  133. (string-append (%swh-base-url) path)
  134. path))
  135. (define url
  136. (string-append root (string-join rest "/" 'prefix)))
  137. ;; Ensure there's a trailing slash or we get a redirect.
  138. (if (string-suffix? "/" url)
  139. url
  140. (string-append url "/")))
  141. ;; XXX: Work around a bug in Guile 3.0.2 where #:verify-certificate? would
  142. ;; be ignored (<https://bugs.gnu.org/40486>).
  143. (define* (http-get* uri #:rest rest)
  144. (apply http-request uri #:method 'GET rest))
  145. (define* (http-post* uri #:rest rest)
  146. (apply http-request uri #:method 'POST rest))
  147. (define %date-regexp
  148. ;; Match strings like "2014-11-17T22:09:38+01:00" or
  149. ;; "2018-09-30T23:20:07.815449+00:00"".
  150. (make-regexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})((\\.[0-9]+)?)([+-][0-9]{2}):([0-9]{2})$"))
  151. (define (string->date* str)
  152. "Return a SRFI-19 date parsed from STR, a date string as returned by
  153. Software Heritage."
  154. ;; We can't use 'string->date' because of the timezone format: SWH returns
  155. ;; "+01:00" when the '~z' template expects "+0100". So we roll our own!
  156. (or (and=> (regexp-exec %date-regexp str)
  157. (lambda (match)
  158. (define (ref n)
  159. (string->number (match:substring match n)))
  160. (make-date (let ((ns (match:substring match 8)))
  161. (if ns
  162. (string->number (string-drop ns 1))
  163. 0))
  164. (ref 6) (ref 5) (ref 4)
  165. (ref 3) (ref 2) (ref 1)
  166. (+ (* 3600 (ref 9)) ;time zone
  167. (if (< (ref 9) 0)
  168. (- (ref 10))
  169. (ref 10))))))
  170. str)) ;oops!
  171. (define (maybe-null proc)
  172. (match-lambda
  173. ((? null?) #f)
  174. ('null #f)
  175. (obj (proc obj))))
  176. (define string*
  177. ;; Converts "string or #nil" coming from JSON to "string or #f".
  178. (match-lambda
  179. ((? string? str) str)
  180. ((? null?) #f) ;Guile-JSON 3.x
  181. ('null #f))) ;Guile-JSON 4.x
  182. (define %allow-request?
  183. ;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true
  184. ;; to keep going. This can be used to disallow requests when
  185. ;; 'request-rate-limit-reached?' returns true, for instance.
  186. (make-parameter (const #t)))
  187. ;; The time when the rate limit for "/origin/save" POST requests and that of
  188. ;; other requests will be reset.
  189. ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
  190. (define %save-rate-limit-reset-time 0)
  191. (define %general-rate-limit-reset-time 0)
  192. (define (request-rate-limit-reached? url method)
  193. "Return true if the rate limit has been reached for URI."
  194. (define uri
  195. (string->uri url))
  196. (define reset-time
  197. (if (and (eq? method http-post*)
  198. (string-prefix? "/api/1/origin/save/" (uri-path uri)))
  199. %save-rate-limit-reset-time
  200. %general-rate-limit-reset-time))
  201. (< (car (gettimeofday)) reset-time))
  202. (define (update-rate-limit-reset-time! url method response)
  203. "Update the rate limit reset time for URL and METHOD based on the headers in
  204. RESPONSE."
  205. (let ((uri (string->uri url)))
  206. (match (assq-ref (response-headers response) 'x-ratelimit-reset)
  207. ((= string->number (? number? reset))
  208. (if (and (eq? method http-post*)
  209. (string-prefix? "/api/1/origin/save/" (uri-path uri)))
  210. (set! %save-rate-limit-reset-time reset)
  211. (set! %general-rate-limit-reset-time reset)))
  212. (_
  213. #f))))
  214. (define* (call url decode #:optional (method http-get*)
  215. #:key (false-if-404? #t))
  216. "Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
  217. using DECODE, a one-argument procedure that takes an input port. When
  218. FALSE-IF-404? is true, return #f upon 404 responses."
  219. (and ((%allow-request?) url method)
  220. (let*-values (((response port)
  221. (method url #:streaming? #t
  222. #:headers
  223. (if (%swh-token)
  224. `((authorization . (Bearer ,(%swh-token))))
  225. '())
  226. #:verify-certificate?
  227. (%verify-swh-certificate?))))
  228. ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
  229. (match (assq-ref (response-headers response) 'x-ratelimit-remaining)
  230. (#f #t)
  231. ((? (compose zero? string->number))
  232. (update-rate-limit-reset-time! url method response)
  233. (throw 'swh-error url method response))
  234. (_ #t))
  235. (cond ((= 200 (response-code response))
  236. (let ((result (decode port)))
  237. (close-port port)
  238. result))
  239. ((and false-if-404?
  240. (= 404 (response-code response)))
  241. (close-port port)
  242. #f)
  243. (else
  244. (close-port port)
  245. (throw 'swh-error url method response))))))
  246. (define-syntax define-query
  247. (syntax-rules (path)
  248. "Define a procedure that performs a Software Heritage query."
  249. ((_ (name args ...) docstring (path components ...)
  250. json->value)
  251. (define (name args ...)
  252. docstring
  253. (call (swh-url components ...) json->value)))))
  254. ;; <https://archive.softwareheritage.org/api/1/origin/https://github.com/guix-mirror/guix/get>
  255. (define-json-mapping <origin> make-origin origin?
  256. json->origin
  257. (visits-url origin-visits-url "origin_visits_url")
  258. (type origin-type)
  259. (url origin-url))
  260. ;; <https://archive.softwareheritage.org/api/1/origin/52181937/visits/>
  261. (define-json-mapping <visit> make-visit visit?
  262. json->visit
  263. (date visit-date "date" string->date*)
  264. (origin visit-origin)
  265. (url visit-url "origin_visit_url")
  266. (snapshot-url visit-snapshot-url "snapshot_url" string*) ;string | #f
  267. (status visit-status "status" string->symbol) ;'full | 'partial | 'ongoing
  268. (number visit-number "visit"))
  269. ;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/>
  270. (define-json-mapping <snapshot> make-snapshot snapshot?
  271. json->snapshot
  272. (id snapshot-id)
  273. (branches snapshot-branches "branches" json->branches))
  274. ;; This is used for the "branches" field of snapshots.
  275. (define-record-type <branch>
  276. (make-branch name target-type target-url)
  277. branch?
  278. (name branch-name)
  279. (target-type branch-target-type) ;release | revision
  280. (target-url branch-target-url))
  281. (define (json->branches branches)
  282. (map (match-lambda
  283. ((key . value)
  284. (make-branch key
  285. (string->symbol
  286. (assoc-ref value "target_type"))
  287. (assoc-ref value "target_url"))))
  288. branches))
  289. ;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/>
  290. (define-json-mapping <release> make-release release?
  291. json->release
  292. (id release-id)
  293. (name release-name)
  294. (message release-message)
  295. (target-type release-target-type "target_type" string->symbol)
  296. (target-url release-target-url "target_url"))
  297. ;; <https://archive.softwareheritage.org/api/1/revision/359fdda40f754bbf1b5dc261e7427b75463b59be/>
  298. ;; Note: Some revisions, such as those for "nixguix" origins (e.g.,
  299. ;; <https://archive.softwareheritage.org/api/1/revision/b8dbc65475bbedde8e015d4730ade8864c38fad3/>),
  300. ;; have their 'date' field set to null.
  301. (define-json-mapping <revision> make-revision revision?
  302. json->revision
  303. (id revision-id)
  304. (date revision-date "date" (maybe-null string->date*))
  305. (directory revision-directory)
  306. (directory-url revision-directory-url "directory_url"))
  307. ;; <https://archive.softwareheritage.org/api/1/content/>
  308. (define-json-mapping <content> make-content content?
  309. json->content
  310. (checksums content-checksums "checksums" json->checksums)
  311. (data-url content-data-url "data_url")
  312. (file-type-url content-file-type-url "filetype_url")
  313. (language-url content-language-url "language_url")
  314. (length content-length)
  315. (license-url content-license-url "license_url"))
  316. (define (json->checksums checksums)
  317. (map (match-lambda
  318. ((key . value)
  319. (cons key (base16-string->bytevector value))))
  320. checksums))
  321. ;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/>
  322. (define-json-mapping <directory-entry> make-directory-entry directory-entry?
  323. json->directory-entry
  324. (name directory-entry-name)
  325. (type directory-entry-type "type"
  326. (match-lambda
  327. ("dir" 'directory)
  328. (str (string->symbol str))))
  329. (checksums directory-entry-checksums "checksums"
  330. (match-lambda
  331. (#f #f)
  332. ((? unspecified?) #f)
  333. (lst (json->checksums lst))))
  334. (id directory-entry-id "dir_id")
  335. (length directory-entry-length)
  336. (permissions directory-entry-permissions "perms")
  337. (target-url directory-entry-target-url "target_url"))
  338. ;; <https://archive.softwareheritage.org/api/1/origin/save/>
  339. (define-json-mapping <save-reply> make-save-reply save-reply?
  340. json->save-reply
  341. (origin-url save-reply-origin-url "origin_url")
  342. (origin-type save-reply-origin-type "origin_type")
  343. (request-date save-reply-request-date "save_request_date"
  344. string->date*)
  345. (request-status save-reply-request-status "save_request_status"
  346. string->symbol)
  347. (task-status save-reply-task-status "save_task_status"
  348. (match-lambda
  349. ("not created" 'not-created)
  350. ((? string? str) (string->symbol str)))))
  351. ;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
  352. (define-json-mapping <vault-reply> make-vault-reply vault-reply?
  353. json->vault-reply
  354. (id vault-reply-id)
  355. (fetch-url vault-reply-fetch-url "fetch_url")
  356. (progress-message vault-reply-progress-message "progress_message")
  357. (status vault-reply-status "status" string->symbol)
  358. (swhid vault-reply-swhid))
  359. ;;;
  360. ;;; RPCs.
  361. ;;;
  362. (define-query (lookup-origin url)
  363. "Return an origin for URL."
  364. (path "/api/1/origin" url "get")
  365. json->origin)
  366. (define-query (lookup-content hash type)
  367. "Return a content for HASH, of the given TYPE--e.g., \"sha256\"."
  368. (path "/api/1/content"
  369. (string-append type ":"
  370. (bytevector->base16-string hash)))
  371. json->content)
  372. (define-query (lookup-revision id)
  373. "Return the revision with the given ID, typically a Git commit SHA1."
  374. (path "/api/1/revision" id)
  375. json->revision)
  376. (define-query (lookup-directory id)
  377. "Return the directory with the given ID."
  378. (path "/api/1/directory" id)
  379. json->directory-entries)
  380. (define (json->directory-entries port)
  381. (map json->directory-entry
  382. (vector->list (json->scm port))))
  383. (define (origin-visits origin)
  384. "Return the list of visits of ORIGIN, a record as returned by
  385. 'lookup-origin'."
  386. (call (swh-url (origin-visits-url origin))
  387. (lambda (port)
  388. (map json->visit (vector->list (json->scm port))))))
  389. (define (visit-snapshot visit)
  390. "Return the snapshot corresponding to VISIT or #f if no snapshot is
  391. available."
  392. (and (visit-snapshot-url visit)
  393. (call (swh-url (visit-snapshot-url visit))
  394. json->snapshot)))
  395. (define (snapshot-url snapshot branch-count first-branch)
  396. "Return the URL of SNAPSHOT such that it contains information for
  397. BRANCH-COUNT branches, starting at FIRST-BRANCH."
  398. (string-append (swh-url "/api/1/snapshot" (snapshot-id snapshot))
  399. "?branches_count=" (number->string branch-count)
  400. "&branches_from=" (uri-encode first-branch)))
  401. (define (lookup-snapshot-branch snapshot name)
  402. "Look up branch NAME on SNAPSHOT. Return the branch, or return #f if it
  403. could not be found."
  404. (or (find (lambda (branch)
  405. (string=? (branch-name branch) name))
  406. (snapshot-branches snapshot))
  407. ;; There's no API entry point to look up a snapshot branch by name.
  408. ;; Work around that by using the paginated list of branches provided by
  409. ;; the /api/1/snapshot API: ask for one branch, and start pagination at
  410. ;; NAME.
  411. (let ((snapshot (call (snapshot-url snapshot 1 name)
  412. json->snapshot)))
  413. (match (snapshot-branches snapshot)
  414. ((branch)
  415. (and (string=? (branch-name branch) name)
  416. branch))
  417. (_ #f)))))
  418. (define (branch-target branch)
  419. "Return the target of BRANCH, either a <revision> or a <release>."
  420. (match (branch-target-type branch)
  421. ('release
  422. (call (swh-url (branch-target-url branch))
  423. json->release))
  424. ('revision
  425. (call (swh-url (branch-target-url branch))
  426. json->revision))))
  427. (define (lookup-origin-revision url tag)
  428. "Return a <revision> corresponding to the given TAG for the repository
  429. coming from URL. Example:
  430. (lookup-origin-revision \"https://github.com/guix-mirror/guix/\" \"v0.8\")
  431. => #<<revision> id: \"44941…\" …>
  432. The information is based on the latest visit of URL available. Return #f if
  433. URL could not be found."
  434. (match (lookup-origin url)
  435. (#f #f)
  436. (origin
  437. (match (filter (lambda (visit)
  438. ;; Return #f if (visit-snapshot VISIT) would return #f.
  439. (and (visit-snapshot-url visit)
  440. (eq? 'full (visit-status visit))))
  441. (origin-visits origin))
  442. ((visit . _)
  443. (let ((snapshot (visit-snapshot visit)))
  444. (match (and=> (find (lambda (branch)
  445. (or
  446. ;; Git specific.
  447. (string=? (string-append "refs/tags/" tag)
  448. (branch-name branch))
  449. ;; Hg specific.
  450. (string=? tag
  451. (branch-name branch))))
  452. (snapshot-branches snapshot))
  453. branch-target)
  454. ((? release? release)
  455. (release-target release))
  456. ((? revision? revision)
  457. revision)
  458. (#f ;tag not found
  459. #f))))
  460. (()
  461. #f)))))
  462. (define (release-target release)
  463. "Return the revision that is the target of RELEASE."
  464. (match (release-target-type release)
  465. ('revision
  466. (call (swh-url (release-target-url release))
  467. json->revision))))
  468. (define (directory-entry-target entry)
  469. "If ENTRY, a directory entry, has type 'directory, return its list of
  470. directory entries; if it has type 'file, return its <content> object."
  471. (call (swh-url (directory-entry-target-url entry))
  472. (match (directory-entry-type entry)
  473. ('file json->content)
  474. ('directory json->directory-entries))))
  475. (define* (save-origin url #:optional (type "git"))
  476. "Request URL to be saved."
  477. (call (swh-url "/api/1/origin/save" type "url" url) json->save-reply
  478. http-post*))
  479. (define-query (save-origin-status url type)
  480. "Return the status of a /save request for URL and TYPE (e.g., \"git\")."
  481. (path "/api/1/origin/save" type "url" url)
  482. json->save-reply)
  483. (define* (vault-url id kind #:optional (archive-type 'flat))
  484. "Return the vault query/cooking URL for ID and KIND. Normally, ID is an
  485. SWHID and KIND is #f; the deprecated convention is to set ID to a raw
  486. directory or revision ID and KIND to 'revision or 'directory."
  487. ;; Note: /api/1/vault/directory/ID was deprecated in favor of
  488. ;; /api/1/vault/flat/SWHID; this procedure "converts" automatically.
  489. (let ((id (match kind
  490. ('directory (string-append "swh:1:dir:" id))
  491. ('revision (string-append "swh:1:rev:" id))
  492. (#f id))))
  493. (swh-url "/api/1/vault" (symbol->string archive-type) id)))
  494. (define* (query-vault id #:optional kind #:key (archive-type 'flat))
  495. "Ask the availability of object ID (an SWHID) to the vault. Return #f if it
  496. could not be found, or a <vault-reply> on success. ARCHIVE-TYPE can be 'flat
  497. for a tarball containing a directory, or 'git-bare for a tarball containing a
  498. bare Git repository corresponding to a revision.
  499. Passing KIND (one of 'directory or 'revision) together with a raw revision or
  500. directory identifier is deprecated."
  501. (call (vault-url id kind archive-type)
  502. json->vault-reply))
  503. (define* (request-cooking id #:optional kind #:key (archive-type 'flat))
  504. "Request the cooking of object ID, an SWHID. Return a <vault-reply>.
  505. ARCHIVE-TYPE can be 'flat for a tarball containing a directory, or 'git-bare
  506. for a tarball containing a bare Git repository corresponding to a revision.
  507. Passing KIND (one of 'directory or 'revision) together with a raw revision or
  508. directory identifier is deprecated."
  509. (call (vault-url id kind archive-type)
  510. json->vault-reply
  511. http-post*))
  512. (define* (vault-fetch id
  513. #:optional kind
  514. #:key
  515. (archive-type 'flat)
  516. (log-port (current-error-port)))
  517. "Return an input port from which a bundle of the object with the given ID,
  518. an SWHID, or #f if the object could not be found.
  519. ARCHIVE-TYPE can be 'flat for a tarball containing a directory, or 'git-bare
  520. for a tarball containing a bare Git repository corresponding to a revision."
  521. (let loop ((reply (query-vault id kind
  522. #:archive-type archive-type)))
  523. (match reply
  524. (#f
  525. (and=> (request-cooking id kind
  526. #:archive-type archive-type)
  527. loop))
  528. (_
  529. (match (vault-reply-status reply)
  530. ('done
  531. ;; Fetch the bundle.
  532. (let-values (((response port)
  533. (http-get* (swh-url (vault-reply-fetch-url reply))
  534. #:streaming? #t
  535. #:verify-certificate?
  536. (%verify-swh-certificate?))))
  537. (if (= (response-code response) 200)
  538. port
  539. (begin ;shouldn't happen
  540. (close-port port)
  541. #f))))
  542. ('failed
  543. ;; Upon failure, we're supposed to try again.
  544. (format log-port "SWH vault: failure: ~a~%"
  545. (vault-reply-progress-message reply))
  546. (format log-port "SWH vault: retrying...~%")
  547. (loop (request-cooking id kind
  548. #:archive-type archive-type)))
  549. ((and (or 'new 'pending) status)
  550. ;; Wait until the bundle shows up.
  551. (let ((message (vault-reply-progress-message reply)))
  552. (when (eq? 'new status)
  553. (format log-port "SWH vault: \
  554. requested bundle cooking, waiting for completion...~%"))
  555. (when (string? message)
  556. (format log-port "SWH vault: ~a~%" message))
  557. ;; Wait long enough so we don't exhaust our maximum number of
  558. ;; requests per hour too fast (as of this writing, the limit is 60
  559. ;; requests per hour per IP address.)
  560. (sleep (if (eq? status 'new) 60 30))
  561. (loop (query-vault id kind
  562. #:archive-type archive-type)))))))))
  563. ;;;
  564. ;;; High-level interface.
  565. ;;;
  566. (define (call-with-temporary-directory proc) ;FIXME: factorize
  567. "Call PROC with a name of a temporary directory; close the directory and
  568. delete it when leaving the dynamic extent of this call."
  569. (let* ((directory (or (getenv "TMPDIR") "/tmp"))
  570. (template (string-append directory "/guix-directory.XXXXXX"))
  571. (tmp-dir (mkdtemp! template)))
  572. (dynamic-wind
  573. (const #t)
  574. (lambda ()
  575. (proc tmp-dir))
  576. (lambda ()
  577. (false-if-exception (delete-file-recursively tmp-dir))))))
  578. (define* (swh-download-archive swhid output
  579. #:key
  580. (archive-type 'flat)
  581. (log-port (current-error-port)))
  582. "Download from Software Heritage the directory or revision with the given
  583. SWID, in the ARCHIVE-TYPE format (one of 'flat or 'git-bare), and unpack it to
  584. OUTPUT. Return #t on success and #f on failure."
  585. (call-with-temporary-directory
  586. (lambda (directory)
  587. (match (vault-fetch swhid
  588. #:archive-type archive-type
  589. #:log-port log-port)
  590. (#f
  591. (format log-port
  592. "SWH: object ~a could not be fetched from the vault~%"
  593. swhid)
  594. #f)
  595. ((? port? input)
  596. (let ((tar (open-pipe* OPEN_WRITE "tar" "-C" directory
  597. (match archive-type
  598. ('flat "-xzvf") ;gzipped
  599. ('git-bare "-xvf")) ;uncompressed
  600. "-")))
  601. (dump-port input tar)
  602. (close-port input)
  603. (let ((status (close-pipe tar)))
  604. (unless (zero? status)
  605. (error "tar extraction failure" status)))
  606. (match (scandir directory)
  607. (("." ".." sub-directory)
  608. (copy-recursively (string-append directory "/" sub-directory)
  609. output
  610. #:log (%make-void-port "w"))
  611. #t))))))))
  612. (define* (swh-download-directory id output
  613. #:key (log-port (current-error-port)))
  614. "Download from Software Heritage the directory with the given ID, and
  615. unpack it to OUTPUT. Return #t on success and #f on failure."
  616. (swh-download-archive (string-append "swh:1:dir:" id) output
  617. #:archive-type 'flat
  618. #:log-port log-port))
  619. (define (commit-id? reference)
  620. "Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if
  621. it is a tag name. This is based on a simple heuristic so use with care!"
  622. (and (= (string-length reference) 40)
  623. (string-every char-set:hex-digit reference)))
  624. (define* (swh-download url reference output
  625. #:key
  626. (archive-type 'flat)
  627. (log-port (current-error-port)))
  628. "Download from Software Heritage a checkout (if ARCHIVE-TYPE is 'flat) or a
  629. full Git repository (if ARCHIVE-TYPE is 'git-bare) of the Git tag or commit
  630. REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success
  631. and #f on failure.
  632. This procedure uses the \"vault\", which contains \"cooked\" directories in
  633. the form of tarballs. If the requested directory is not cooked yet, it will
  634. wait until it becomes available, which could take several minutes."
  635. (match (if (commit-id? reference)
  636. (lookup-revision reference)
  637. (lookup-origin-revision url reference))
  638. ((? revision? revision)
  639. (format log-port "SWH: found revision ~a with directory at '~a'~%"
  640. (revision-id revision)
  641. (swh-url (revision-directory-url revision)))
  642. (swh-download-archive (match archive-type
  643. ('flat
  644. (string-append
  645. "swh:1:dir:" (revision-directory revision)))
  646. ('git-bare
  647. (string-append
  648. "swh:1:rev:" (revision-id revision))))
  649. output
  650. #:archive-type archive-type
  651. #:log-port log-port))
  652. (#f
  653. (format log-port
  654. "SWH: revision ~s originating from ~a could not be found~%"
  655. reference url)
  656. #f)))