serialization.scm 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
  3. ;;;
  4. ;;; This file is part of GNU Guix.
  5. ;;;
  6. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  7. ;;; under the terms of the GNU General Public License as published by
  8. ;;; the Free Software Foundation; either version 3 of the License, or (at
  9. ;;; your option) any later version.
  10. ;;;
  11. ;;; GNU Guix is distributed in the hope that it will be useful, but
  12. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. ;;; GNU General Public License for more details.
  15. ;;;
  16. ;;; You should have received a copy of the GNU General Public License
  17. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  18. (define-module (guix serialization)
  19. #:use-module (guix combinators)
  20. #:use-module (rnrs bytevectors)
  21. #:use-module (srfi srfi-1)
  22. #:use-module (srfi srfi-26)
  23. #:use-module (srfi srfi-34)
  24. #:use-module (srfi srfi-35)
  25. #:use-module (ice-9 binary-ports)
  26. #:use-module ((ice-9 rdelim) #:prefix rdelim:)
  27. #:use-module (ice-9 match)
  28. #:use-module (ice-9 ftw)
  29. #:use-module (system foreign)
  30. #:export (write-int read-int
  31. write-long-long read-long-long
  32. write-padding
  33. write-bytevector write-string
  34. read-string read-latin1-string read-maybe-utf8-string
  35. write-string-list read-string-list
  36. write-string-pairs read-string-pairs
  37. write-store-path read-store-path
  38. write-store-path-list read-store-path-list
  39. (dump . dump-port*)
  40. &nar-error
  41. nar-error?
  42. nar-error-port
  43. nar-error-file
  44. &nar-read-error
  45. nar-read-error?
  46. nar-read-error-token
  47. write-file
  48. write-file-tree
  49. fold-archive
  50. restore-file
  51. dump-file))
  52. ;;; Comment:
  53. ;;;
  54. ;;; Serialization procedures used by the RPCs and the Nar format. This module
  55. ;;; is for internal consumption.
  56. ;;;
  57. ;;; Code:
  58. ;; Similar to serialize.cc in Nix.
  59. (define-condition-type &nar-error &error ; XXX: inherit from &store-error ?
  60. nar-error?
  61. (file nar-error-file) ; file we were restoring, or #f
  62. (port nar-error-port)) ; port from which we read
  63. (define currently-restored-file
  64. ;; Name of the file being restored. Used internally for error reporting.
  65. (make-parameter #f))
  66. (define (get-bytevector-n* port count)
  67. (let ((bv (get-bytevector-n port count)))
  68. (when (or (eof-object? bv)
  69. (< (bytevector-length bv) count))
  70. (raise (condition (&nar-error
  71. (file (currently-restored-file))
  72. (port port)))))
  73. bv))
  74. (define (sub-bytevector bv len)
  75. "Return a bytevector that aliases the first LEN bytes of BV."
  76. (define max (bytevector-length bv))
  77. (cond ((= len max) bv)
  78. ((< len max)
  79. ;; Yes, this is safe because the result of each conversion procedure
  80. ;; has its life cycle synchronized with that of its argument.
  81. (pointer->bytevector (bytevector->pointer bv) len))
  82. (else
  83. (error "sub-bytevector called to get a super bytevector"))))
  84. (define (write-int n p)
  85. (let ((b (make-bytevector 8 0)))
  86. (bytevector-u32-set! b 0 n (endianness little))
  87. (put-bytevector p b)))
  88. (define (read-int p)
  89. (let ((b (get-bytevector-n* p 8)))
  90. (bytevector-u32-ref b 0 (endianness little))))
  91. (define (write-long-long n p)
  92. (let ((b (make-bytevector 8 0)))
  93. (bytevector-u64-set! b 0 n (endianness little))
  94. (put-bytevector p b)))
  95. (define (read-long-long p)
  96. (let ((b (get-bytevector-n* p 8)))
  97. (bytevector-u64-ref b 0 (endianness little))))
  98. (define write-padding
  99. (let ((zero (make-bytevector 8 0)))
  100. (lambda (n p)
  101. (let ((m (modulo n 8)))
  102. (or (zero? m)
  103. (put-bytevector p zero 0 (- 8 m)))))))
  104. (define* (write-bytevector s p
  105. #:optional (l (bytevector-length s)))
  106. (let* ((m (modulo l 8))
  107. (b (make-bytevector (+ 8 l (if (zero? m) 0 (- 8 m))))))
  108. (bytevector-u32-set! b 0 l (endianness little))
  109. (bytevector-copy! s 0 b 8 l)
  110. (put-bytevector p b)))
  111. (define (write-string s p)
  112. (write-bytevector (string->utf8 s) p))
  113. (define (read-byte-string p)
  114. (let* ((len (read-int p))
  115. (m (modulo len 8))
  116. (pad (if (zero? m) 0 (- 8 m)))
  117. (bv (get-bytevector-n* p (+ len pad))))
  118. (sub-bytevector bv len)))
  119. (define (read-string p)
  120. (utf8->string (read-byte-string p)))
  121. (define (read-latin1-string p)
  122. "Read an ISO-8859-1 string from P."
  123. ;; Note: do not use 'get-string-n' to work around Guile bug
  124. ;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for
  125. ;; a discussion.
  126. (let ((bv (read-byte-string p)))
  127. ;; XXX: Rewrite using (ice-9 iconv).
  128. (list->string (map integer->char (bytevector->u8-list bv)))))
  129. (define (read-maybe-utf8-string p)
  130. "Read a serialized string from port P. Attempt to decode it as UTF-8 and
  131. substitute invalid byte sequences with question marks. This is a
  132. \"permissive\" UTF-8 decoder."
  133. ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
  134. ;; and substitute invalid byte sequences with question marks, but this is
  135. ;; not very efficient. Eventually Guile may provide a lightweight
  136. ;; permissive UTF-8 decoder.
  137. (let* ((bv (read-byte-string p))
  138. (port (open-bytevector-input-port bv)))
  139. (set-port-encoding! port "UTF-8")
  140. (set-port-conversion-strategy! port 'substitute)
  141. (rdelim:read-string port)))
  142. (define (write-string-list l p)
  143. (write-int (length l) p)
  144. (for-each (cut write-string <> p) l))
  145. (define (read-string-list p)
  146. (let ((len (read-int p)))
  147. (unfold (cut >= <> len)
  148. (lambda (i)
  149. (read-string p))
  150. 1+
  151. 0)))
  152. (define (write-string-pairs l p)
  153. (write-int (length l) p)
  154. (for-each (match-lambda
  155. ((first . second)
  156. (write-string first p)
  157. (write-string second p)))
  158. l))
  159. (define (read-string-pairs p)
  160. (let ((len (read-int p)))
  161. (unfold (cut >= <> len)
  162. (lambda (i)
  163. (cons (read-string p) (read-string p)))
  164. 1+
  165. 0)))
  166. (define (write-store-path f p)
  167. (write-string f p)) ; TODO: assert path
  168. (define (read-store-path p)
  169. (read-string p)) ; TODO: assert path
  170. (define write-store-path-list write-string-list)
  171. (define read-store-path-list read-string-list)
  172. (define-syntax write-literal-strings
  173. (lambda (s)
  174. "Write the given literal strings to PORT in an optimized fashion, without
  175. any run-time allocations or computations."
  176. (define (padding len)
  177. (let ((m (modulo len 8)))
  178. (if (zero? m)
  179. 0
  180. (- 8 m))))
  181. (syntax-case s ()
  182. ((_ port strings ...)
  183. (let* ((bytes (map string->utf8 (syntax->datum #'(strings ...))))
  184. (len (fold (lambda (bv size)
  185. (+ size 8 (bytevector-length bv)
  186. (padding (bytevector-length bv))))
  187. 0
  188. bytes))
  189. (bv (make-bytevector len))
  190. (zeros (make-bytevector 8 0)))
  191. (fold (lambda (str offset)
  192. (let ((len (bytevector-length str)))
  193. (bytevector-u32-set! bv offset len (endianness little))
  194. (bytevector-copy! str 0 bv (+ 8 offset) len)
  195. (bytevector-copy! zeros 0 bv (+ 8 offset len)
  196. (padding len))
  197. (+ offset 8 len (padding len))))
  198. 0
  199. bytes)
  200. #`(put-bytevector port #,bv))))))
  201. (define-condition-type &nar-read-error &nar-error
  202. nar-read-error?
  203. (token nar-read-error-token)) ; faulty token, or #f
  204. (define (dump in out size)
  205. "Copy SIZE bytes from IN to OUT."
  206. (define buf-size 65536)
  207. (define buf (make-bytevector buf-size))
  208. (let loop ((left size))
  209. (if (<= left 0)
  210. 0
  211. (let ((read (get-bytevector-n! in buf 0 (min left buf-size))))
  212. (if (eof-object? read)
  213. left
  214. (begin
  215. (put-bytevector out buf 0 read)
  216. (loop (- left read))))))))
  217. (define (write-contents-from-port input output size)
  218. "Write SIZE bytes from port INPUT to port OUTPUT."
  219. (write-string "contents" output)
  220. (write-long-long size output)
  221. ;; Use 'sendfile' when both OUTPUT and INPUT are file ports.
  222. (if (and (file-port? output) (file-port? input))
  223. (sendfile output input size 0)
  224. (dump input output size))
  225. (write-padding size output))
  226. (define (read-file-type port)
  227. "Read the file type tag from PORT, and return either 'regular or
  228. 'executable."
  229. (match (read-string port)
  230. ("contents"
  231. 'regular)
  232. ("executable"
  233. (match (list (read-string port) (read-string port))
  234. (("" "contents") 'executable)
  235. (x (raise
  236. (condition (&message
  237. (message "unexpected executable file marker"))
  238. (&nar-read-error (port port)
  239. (file #f)
  240. (token x)))))))
  241. (x
  242. (raise
  243. (condition (&message (message "unsupported nar file type"))
  244. (&nar-read-error (port port) (file #f) (token x)))))))
  245. (define %archive-version-1
  246. ;; Magic cookie for Nix archives.
  247. "nix-archive-1")
  248. (define* (write-file file port
  249. #:key (select? (const #t)))
  250. "Write the contents of FILE to PORT in Nar format, recursing into
  251. sub-directories of FILE as needed. For each directory entry, call (SELECT?
  252. FILE STAT), where FILE is the entry's absolute file name and STAT is the
  253. result of 'lstat'; exclude entries for which SELECT? does not return true."
  254. (write-file-tree file port
  255. #:file-type+size
  256. (lambda (file)
  257. (let* ((stat (lstat file))
  258. (size (stat:size stat)))
  259. (case (stat:type stat)
  260. ((directory)
  261. (values 'directory size))
  262. ((regular)
  263. (values (if (zero? (logand (stat:mode stat)
  264. #o100))
  265. 'regular
  266. 'executable)
  267. size))
  268. (else
  269. (values (stat:type stat) size))))) ;bah!
  270. #:file-port (cut open-file <> "r0b")
  271. #:symlink-target readlink
  272. #:directory-entries
  273. (lambda (directory)
  274. ;; 'scandir' defaults to 'string-locale<?' to sort files,
  275. ;; but this happens to be case-insensitive (at least in
  276. ;; 'en_US' locale on libc 2.18.) Conversely, we want
  277. ;; files to be sorted in a case-sensitive fashion.
  278. (define basenames
  279. (scandir directory (negate (cut member <> '("." "..")))
  280. string<?))
  281. (filter-map (lambda (base)
  282. (let ((file (string-append directory
  283. "/" base)))
  284. (and (select? file (lstat file))
  285. base)))
  286. basenames))
  287. ;; The 'scandir' call above gives us filtered and sorted
  288. ;; entries, so no post-processing is needed.
  289. #:postprocess-entries identity))
  290. (define (filter/sort-directory-entries lst)
  291. "Remove dot and dot-dot entries from LST, and sort it in lexicographical
  292. order."
  293. (delete-duplicates
  294. (sort (remove (cute member <> '("." "..")) lst)
  295. string<?)
  296. string=?))
  297. (define* (write-file-tree file port
  298. #:key
  299. file-type+size
  300. file-port
  301. symlink-target
  302. directory-entries
  303. (postprocess-entries filter/sort-directory-entries))
  304. "Write the contents of FILE to PORT in Nar format, recursing into
  305. sub-directories of FILE as needed.
  306. This procedure does not make any file-system I/O calls. Instead, it calls the
  307. user-provided FILE-TYPE+SIZE, FILE-PORT, SYMLINK-TARGET, and DIRECTORY-ENTRIES
  308. procedures, which roughly correspond to 'lstat', 'readlink', and 'scandir'.
  309. POSTPROCESS-ENTRIES ensures that directory entries are valid; leave it as-is
  310. unless you know that DIRECTORY-ENTRIES provide filtered and sorted entries, in
  311. which case you can use 'identity'."
  312. (define p port)
  313. (write-string %archive-version-1 p)
  314. (let dump ((f file))
  315. (define-values (type size)
  316. (file-type+size f))
  317. (write-literal-strings p "(")
  318. (case type
  319. ((regular executable)
  320. (write-literal-strings p "type" "regular")
  321. (when (eq? 'executable type)
  322. (write-literal-strings p "executable" ""))
  323. (let ((input (file-port f)))
  324. (dynamic-wind
  325. (const #t)
  326. (lambda ()
  327. (write-contents-from-port input p size))
  328. (lambda ()
  329. (close-port input)))))
  330. ((directory)
  331. (write-literal-strings p "type" "directory")
  332. (let ((entries (postprocess-entries (directory-entries f))))
  333. (for-each (lambda (e)
  334. (let* ((f (string-append f "/" e)))
  335. (write-literal-strings p "entry" "(" "name")
  336. (write-string e p)
  337. (write-literal-strings p "node")
  338. (dump f)
  339. (write-literal-strings p ")")))
  340. entries)))
  341. ((symlink)
  342. (write-literal-strings p "type" "symlink" "target")
  343. (write-string (symlink-target f) p))
  344. (else
  345. (raise (condition (&message (message "unsupported file type"))
  346. (&nar-error (file f) (port port))))))
  347. (write-literal-strings p ")")))
  348. (define port-conversion-strategy
  349. (fluid->parameter %default-port-conversion-strategy))
  350. (define (fold-archive proc seed port file)
  351. "Read a file (possibly a directory structure) in Nar format from PORT. Call
  352. PROC on each file or directory read from PORT using:
  353. (PROC FILE TYPE CONTENTS RESULT)
  354. using SEED as the first RESULT. TYPE is a symbol like 'regular, and CONTENTS
  355. depends on TYPE."
  356. (parameterize ((currently-restored-file file)
  357. ;; Error out if we can convert file names to the current
  358. ;; locale. (XXX: We'd prefer UTF-8 encoding for file names
  359. ;; regardless of the locale, but that's what Guile gives us
  360. ;; so far.)
  361. (port-conversion-strategy 'error))
  362. (let ((signature (read-string port)))
  363. (unless (equal? signature %archive-version-1)
  364. (raise
  365. (condition (&message (message "invalid nar signature"))
  366. (&nar-read-error (port port)
  367. (token signature)
  368. (file #f))))))
  369. (let read ((file file)
  370. (result seed))
  371. (define (read-eof-marker)
  372. (match (read-string port)
  373. (")" #t)
  374. (x (raise
  375. (condition
  376. (&message (message "invalid nar end-of-file marker"))
  377. (&nar-read-error (port port) (file file) (token x)))))))
  378. (currently-restored-file file)
  379. (match (list (read-string port) (read-string port) (read-string port))
  380. (("(" "type" "regular")
  381. (let* ((type (read-file-type port))
  382. (size (read-long-long port))
  383. ;; The caller must read exactly SIZE bytes from PORT.
  384. (result (proc file type `(,port . ,size) result)))
  385. (let ((m (modulo size 8)))
  386. (unless (zero? m)
  387. (get-bytevector-n* port (- 8 m))))
  388. (read-eof-marker)
  389. result))
  390. (("(" "type" "symlink")
  391. (match (list (read-string port) (read-string port))
  392. (("target" target)
  393. (let ((result (proc file 'symlink target result)))
  394. (read-eof-marker)
  395. result))
  396. (x (raise
  397. (condition
  398. (&message (message "invalid symlink tokens"))
  399. (&nar-read-error (port port) (file file) (token x)))))))
  400. (("(" "type" "directory")
  401. (let ((dir file))
  402. (let loop ((prefix (read-string port))
  403. (result (proc file 'directory #f result)))
  404. (match prefix
  405. ("entry"
  406. (match (list (read-string port)
  407. (read-string port) (read-string port)
  408. (read-string port))
  409. (("(" "name" file "node")
  410. (let ((result (read (string-append dir "/" file) result)))
  411. (match (read-string port)
  412. (")" #f)
  413. (x
  414. (raise
  415. (condition
  416. (&message
  417. (message "unexpected directory entry termination"))
  418. (&nar-read-error (port port)
  419. (file file)
  420. (token x))))))
  421. (loop (read-string port) result)))))
  422. (")" ;done with DIR
  423. (proc file 'directory-complete #f result))
  424. (x
  425. (raise
  426. (condition
  427. (&message (message "unexpected directory inter-entry marker"))
  428. (&nar-read-error (port port) (file file) (token x)))))))))
  429. (x
  430. (raise
  431. (condition
  432. (&message (message "unsupported nar entry type"))
  433. (&nar-read-error (port port) (file file) (token x)))))))))
  434. (define (dump-file file input size type)
  435. "Dump SIZE bytes from INPUT to FILE.
  436. This procedure is suitable for use as the #:dump-file argument to
  437. 'restore-file'."
  438. (call-with-output-file file
  439. (lambda (output)
  440. (dump input output size))))
  441. (define* (restore-file port file
  442. #:key (dump-file dump-file))
  443. "Read a file (possibly a directory structure) in Nar format from PORT.
  444. Restore it as FILE with canonical permissions and timestamps. To write a
  445. regular or executable file, call:
  446. (DUMP-FILE FILE INPUT SIZE TYPE)
  447. The default is to dump SIZE bytes from INPUT to FILE, but callers can provide
  448. a custom procedure, for instance to deduplicate FILE on the fly."
  449. (fold-archive (lambda (file type content result)
  450. (match type
  451. ('directory
  452. (mkdir file))
  453. ('directory-complete
  454. (chmod file #o555)
  455. (utime file 1 1 0 0))
  456. ('symlink
  457. (symlink content file)
  458. (utime file 1 1 0 0 AT_SYMLINK_NOFOLLOW))
  459. ((or 'regular 'executable)
  460. (match content
  461. ((input . size)
  462. (dump-file file input size type)
  463. (chmod file (if (eq? type 'executable)
  464. #o555
  465. #o444))
  466. (utime file 1 1 0 0))))))
  467. #t
  468. port
  469. file))
  470. ;;; Local Variables:
  471. ;;; eval: (put 'call-with-binary-input-file 'scheme-indent-function 1)
  472. ;;; End:
  473. ;;; serialization.scm ends here