graft.scm 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2014, 2015, 2016, 2018 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2016, 2021 Mark H Weaver <mhw@netris.org>
  4. ;;;
  5. ;;; This file is part of GNU Guix.
  6. ;;;
  7. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  8. ;;; under the terms of the GNU General Public License as published by
  9. ;;; the Free Software Foundation; either version 3 of the License, or (at
  10. ;;; your option) any later version.
  11. ;;;
  12. ;;; GNU Guix is distributed in the hope that it will be useful, but
  13. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;;; GNU General Public License for more details.
  16. ;;;
  17. ;;; You should have received a copy of the GNU General Public License
  18. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  19. (define-module (guix build graft)
  20. #:use-module (guix build utils)
  21. #:use-module (guix build debug-link)
  22. #:use-module (rnrs bytevectors)
  23. #:use-module (ice-9 vlist)
  24. #:use-module (ice-9 match)
  25. #:use-module (ice-9 threads)
  26. #:use-module (ice-9 binary-ports)
  27. #:use-module (srfi srfi-1) ; list library
  28. #:use-module (srfi srfi-26) ; cut and cute
  29. #:export (replace-store-references
  30. rewrite-directory
  31. graft))
  32. ;;; Commentary:
  33. ;;;
  34. ;;; This module supports "grafts". Grafting a directory means rewriting it,
  35. ;;; with references to some specific items replaced by references to other
  36. ;;; store items---the grafts.
  37. ;;;
  38. ;;; This method is used to provide fast security updates as only the leaves of
  39. ;;; the dependency graph need to be grafted, even when the security updates
  40. ;;; affect a core component such as Bash or libc. It is based on the idea of
  41. ;;; 'replace-dependency' implemented by Shea Levy in Nixpkgs.
  42. ;;;
  43. ;;; Code:
  44. (define-syntax-rule (define-inline name val)
  45. (define-syntax name (identifier-syntax val)))
  46. (define-inline hash-length 32)
  47. (define nix-base32-char?
  48. (cute char-set-contains?
  49. ;; ASCII digits and lower case letters except e o t u
  50. (string->char-set "0123456789abcdfghijklmnpqrsvwxyz")
  51. <>))
  52. (define (nix-base32-char-or-nul? c)
  53. "Return true if C is a nix-base32 character or NUL, otherwise return false."
  54. (or (nix-base32-char? c)
  55. (char=? c #\nul)))
  56. (define (possible-utf16-hash? buffer i w)
  57. "Return true if (I - W) is large enough to hold a UTF-16 encoded
  58. nix-base32 hash and if BUFFER contains NULs in all positions where NULs
  59. are to be expected in a UTF-16 encoded hash+dash pattern whose dash is
  60. found at position I. Otherwise, return false."
  61. (and (<= (* 2 hash-length) (- i w))
  62. (let loop ((j (+ 1 (- i (* 2 hash-length)))))
  63. (or (>= j i)
  64. (and (zero? (bytevector-u8-ref buffer j))
  65. (loop (+ j 2)))))))
  66. (define (possible-utf32-hash? buffer i w)
  67. "Return true if (I - W) is large enough to hold a UTF-32 encoded
  68. nix-base32 hash and if BUFFER contains NULs in all positions where NULs
  69. are to be expected in a UTF-32 encoded hash+dash pattern whose dash is
  70. found at position I. Otherwise, return false."
  71. (and (<= (* 4 hash-length) (- i w))
  72. (let loop ((j (+ 1 (- i (* 4 hash-length)))))
  73. (or (>= j i)
  74. (and (zero? (bytevector-u8-ref buffer j))
  75. (zero? (bytevector-u8-ref buffer (+ j 1)))
  76. (zero? (bytevector-u8-ref buffer (+ j 2)))
  77. (loop (+ j 4)))))))
  78. (define (insert-nuls char-size bv)
  79. "Given a bytevector BV, return a bytevector containing the same bytes but
  80. with (CHAR-SIZE - 1) NULs inserted between every two adjacent bytes from BV.
  81. For example, (insert-nuls 4 #u8(1 2 3)) => #u8(1 0 0 0 2 0 0 0 3)."
  82. (if (= char-size 1)
  83. bv
  84. (let* ((len (bytevector-length bv))
  85. (bv* (make-bytevector (+ 1 (* char-size
  86. (- len 1)))
  87. 0)))
  88. (let loop ((i 0))
  89. (when (< i len)
  90. (bytevector-u8-set! bv* (* i char-size)
  91. (bytevector-u8-ref bv i))
  92. (loop (+ i 1))))
  93. bv*)))
  94. (define* (replace-store-references input output replacement-table
  95. #:optional (store (%store-directory)))
  96. "Read data from INPUT, replacing store references according to
  97. REPLACEMENT-TABLE, and writing the result to OUTPUT. REPLACEMENT-TABLE is a
  98. vhash that maps strings (original hashes) to bytevectors (replacement strings
  99. comprising the replacement hash, a dash, and a string).
  100. Note: We use string keys to work around the fact that guile-2.0 hashes all
  101. bytevectors to the same value."
  102. (define (lookup-replacement s)
  103. (match (vhash-assoc s replacement-table)
  104. ((origin . replacement)
  105. replacement)
  106. (#f #f)))
  107. (define (optimize-u8-predicate pred)
  108. (cute vector-ref
  109. (list->vector (map pred (iota 256)))
  110. <>))
  111. (define nix-base32-byte-or-nul?
  112. (optimize-u8-predicate
  113. (compose nix-base32-char-or-nul?
  114. integer->char)))
  115. (define (dash? byte) (= byte 45))
  116. (define request-size (expt 2 20)) ; 1 MiB
  117. ;; We scan the file for the following 33-byte pattern: 32 bytes of
  118. ;; nix-base32 characters followed by a dash. When we find such a pattern
  119. ;; whose hash is in REPLACEMENT-TABLE, we perform the required rewrite and
  120. ;; continue scanning.
  121. ;;
  122. ;; To support UTF-16 and UTF-32 store references, the 33 bytes comprising
  123. ;; this hash+dash pattern may optionally be interspersed by extra NUL bytes.
  124. ;; This simple approach works because the characters we are looking for are
  125. ;; restricted to ASCII. UTF-16 hashes are interspersed with single NUL
  126. ;; bytes ("\0"), and UTF-32 hashes are interspersed with triplets of NULs
  127. ;; ("\0\0\0"). Note that we require NULs to be present only *between* the
  128. ;; other bytes, and not at either end, in order to be insensitive to byte
  129. ;; order.
  130. ;;
  131. ;; To accommodate large files, we do not read the entire file at once, but
  132. ;; instead work on buffers of up to REQUEST-SIZE bytes. To ensure that
  133. ;; every hash+dash pattern appears in its entirety in at least one buffer,
  134. ;; adjacent buffers must overlap by one byte less than the maximum size of a
  135. ;; hash+dash pattern. We accomplish this by "ungetting" a suffix of each
  136. ;; buffer before reading the next buffer, unless we know that we've reached
  137. ;; the end-of-file.
  138. (let ((buffer (make-bytevector request-size)))
  139. (define-syntax-rule (byte-at i)
  140. (bytevector-u8-ref buffer i))
  141. (let outer-loop ()
  142. (match (get-bytevector-n! input buffer 0 request-size)
  143. ((? eof-object?) 'done)
  144. (end
  145. (define (scan-from i w)
  146. ;; Scan the buffer for dashes that might be preceded by nix hashes,
  147. ;; where I is the minimum position where such a dash might be
  148. ;; found, and W is the number of bytes in the buffer that have been
  149. ;; written so far. We assume that I - W >= HASH-LENGTH.
  150. ;;
  151. ;; The key optimization here is that whenever we find a byte at
  152. ;; position I that cannot occur within a nix hash (because it's
  153. ;; neither a nix-base32 character nor NUL), we can infer that the
  154. ;; earliest position where the next hash could start is at I + 1,
  155. ;; and therefore the earliest position for the following dash is
  156. ;; (+ I 1 HASH-LENGTH), which is I + 33.
  157. ;;
  158. ;; Since nix-base32-or-nul characters comprise only about 1/8 of
  159. ;; the 256 possible byte values, and exclude some of the most
  160. ;; common letters in English text (e t o u), we can advance 33
  161. ;; positions much of the time.
  162. (if (< i end)
  163. (let ((byte (byte-at i)))
  164. (cond ((dash? byte)
  165. (found-dash i w))
  166. ((nix-base32-byte-or-nul? byte)
  167. (scan-from (+ i 1) w))
  168. (else
  169. (not-part-of-hash i w))))
  170. (finish-buffer i w)))
  171. (define (not-part-of-hash i w)
  172. ;; Position I is known to not be within a nix hash that we must
  173. ;; rewrite. Therefore, the earliest position where the next hash
  174. ;; might start is I + 1, and therefore the earliest position of
  175. ;; the following dash is (+ I 1 HASH-LENGTH).
  176. (scan-from (+ i 1 hash-length) w))
  177. (define (found-dash i w)
  178. ;; We know that there is a dash '-' at position I, and that
  179. ;; I - W >= HASH-LENGTH. The immediately preceding bytes *might*
  180. ;; contain a nix-base32 hash, but that is not yet known. Here,
  181. ;; we rule out all but one possible encoding (ASCII, UTF-16,
  182. ;; UTF-32) by counting how many NULs precede the dash.
  183. (cond ((not (zero? (byte-at (- i 1))))
  184. ;; The dash is *not* preceded by a NUL, therefore it
  185. ;; cannot possibly be a UTF-16 or UTF-32 hash. Proceed
  186. ;; to check for an ASCII hash.
  187. (found-possible-hash 1 i w))
  188. ((not (zero? (byte-at (- i 2))))
  189. ;; The dash is preceded by exactly one NUL, therefore it
  190. ;; cannot be an ASCII or UTF-32 hash. Proceed to check
  191. ;; for a UTF-16 hash.
  192. (if (possible-utf16-hash? buffer i w)
  193. (found-possible-hash 2 i w)
  194. (not-part-of-hash i w)))
  195. (else
  196. ;; The dash is preceded by at least two NULs, therefore
  197. ;; it cannot be an ASCII or UTF-16 hash. Proceed to
  198. ;; check for a UTF-32 hash.
  199. (if (possible-utf32-hash? buffer i w)
  200. (found-possible-hash 4 i w)
  201. (not-part-of-hash i w)))))
  202. (define (found-possible-hash char-size i w)
  203. ;; We know that there is a dash '-' at position I, that
  204. ;; I - W >= CHAR-SIZE * HASH-LENGTH, and that the only
  205. ;; possible encoding for the preceding hash is as indicated by
  206. ;; CHAR-SIZE. Here we check to see if the given hash is in
  207. ;; REPLACEMENT-TABLE, and if so, we perform the required
  208. ;; rewrite.
  209. (let* ((hash (string-tabulate
  210. (lambda (j)
  211. (integer->char
  212. (byte-at (- i (* char-size
  213. (- hash-length j))))))
  214. hash-length))
  215. (replacement* (lookup-replacement hash))
  216. (replacement (and replacement*
  217. (insert-nuls char-size replacement*))))
  218. (cond
  219. ((not replacement)
  220. (not-part-of-hash i w))
  221. (else
  222. ;; We've found a hash that needs to be replaced.
  223. ;; First, write out all bytes preceding the hash
  224. ;; that have not yet been written.
  225. (put-bytevector output buffer w
  226. (- i (* char-size hash-length) w))
  227. ;; Now write the replacement string.
  228. (put-bytevector output replacement)
  229. ;; Now compute the new values of W and I and continue.
  230. (let ((w (+ (- i (* char-size hash-length))
  231. (bytevector-length replacement))))
  232. (scan-from (+ w hash-length) w))))))
  233. (define (finish-buffer i w)
  234. ;; We have finished scanning the buffer. Now we determine how many
  235. ;; bytes have not yet been written, and how many bytes to "unget".
  236. ;; If END is less than REQUEST-SIZE then we read less than we asked
  237. ;; for, which indicates that we are at EOF, so we needn't unget
  238. ;; anything. Otherwise, we unget up to (* 4 HASH-LENGTH) bytes.
  239. ;; However, we must be careful not to unget bytes that have already
  240. ;; been written, because that would cause them to be written again
  241. ;; from the next buffer. In practice, this case occurs when a
  242. ;; replacement is made near or beyond the end of the buffer. When
  243. ;; REPLACEMENT went beyond END, we consume the extra bytes from
  244. ;; INPUT.
  245. (if (> w end)
  246. (get-bytevector-n! input buffer 0 (- w end))
  247. (let* ((unwritten (- end w))
  248. (unget-size (if (= end request-size)
  249. (min (* 4 hash-length)
  250. unwritten)
  251. 0))
  252. (write-size (- unwritten unget-size)))
  253. (put-bytevector output buffer w write-size)
  254. (unget-bytevector input buffer (+ w write-size)
  255. unget-size)))
  256. (outer-loop))
  257. (scan-from hash-length 0))))))
  258. (define (rename-matching-files directory mapping)
  259. "Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is
  260. a list of store file name pairs."
  261. (let* ((mapping (map (match-lambda
  262. ((source . target)
  263. (cons (basename source) (basename target))))
  264. mapping))
  265. (matches (find-files directory
  266. (lambda (file stat)
  267. (assoc-ref mapping (basename file)))
  268. #:directories? #t)))
  269. ;; XXX: This is not quite correct: if MAPPING contains "foo", and
  270. ;; DIRECTORY contains "bar/foo/foo", we first rename "bar/foo" and then
  271. ;; "bar/foo/foo" no longer exists so we fail. Oh well, surely that's good
  272. ;; enough!
  273. (for-each (lambda (file)
  274. (let ((target (assoc-ref mapping (basename file))))
  275. (rename-file file
  276. (string-append (dirname file) "/" target))))
  277. matches)))
  278. (define (exit-on-exception proc)
  279. "Return a procedure that wraps PROC so that 'primitive-exit' is called when
  280. an exception is caught."
  281. (lambda (arg)
  282. (catch #t
  283. (lambda ()
  284. (proc arg))
  285. (lambda (key . args)
  286. ;; Since ports are not thread-safe as of Guile 2.0, reopen stderr.
  287. (let ((port (fdopen 2 "w0")))
  288. (print-exception port #f key args)
  289. (primitive-exit 1))))))
  290. ;; We need this as long as we support Guile < 2.0.13.
  291. (define* (mkdir-p* dir #:optional (mode #o755))
  292. "This is a variant of 'mkdir-p' that works around
  293. <http://bugs.gnu.org/24659> by passing MODE explicitly in each 'mkdir' call."
  294. (define absolute?
  295. (string-prefix? "/" dir))
  296. (define not-slash
  297. (char-set-complement (char-set #\/)))
  298. (let loop ((components (string-tokenize dir not-slash))
  299. (root (if absolute?
  300. ""
  301. ".")))
  302. (match components
  303. ((head tail ...)
  304. (let ((path (string-append root "/" head)))
  305. (catch 'system-error
  306. (lambda ()
  307. (mkdir path mode)
  308. (loop tail path))
  309. (lambda args
  310. (if (= EEXIST (system-error-errno args))
  311. (loop tail path)
  312. (apply throw args))))))
  313. (() #t))))
  314. (define* (rewrite-directory directory output mapping
  315. #:optional (store (%store-directory)))
  316. "Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of
  317. file name pairs."
  318. (define hash-mapping
  319. ;; List of hash/replacement pairs, where the hash is a nix-base32 string
  320. ;; and the replacement is a string that includes the replacement's name,
  321. ;; like "r837zajjc1q8z9hph4b6860a9c05blyy-openssl-1.0.2j".
  322. (let* ((prefix (string-append store "/"))
  323. (start (string-length prefix))
  324. (end (+ start hash-length)))
  325. (define (valid-hash? h)
  326. (every nix-base32-char? (string->list h)))
  327. (define (hash+rest s)
  328. (and (< end (string-length s))
  329. (let ((hash (substring s start end))
  330. (all (substring s start)))
  331. (and (string-prefix? prefix s)
  332. (valid-hash? hash)
  333. (eqv? #\- (string-ref s end))
  334. (list hash all)))))
  335. (map (match-lambda
  336. (((= hash+rest (origin-hash origin-string))
  337. .
  338. (= hash+rest (replacement-hash replacement-string)))
  339. (unless (= (string-length origin-string)
  340. (string-length replacement-string))
  341. (error "replacement length differs from the original length"
  342. origin-string replacement-string))
  343. (cons origin-hash (string->utf8 replacement-string)))
  344. ((origin . replacement)
  345. (error "invalid replacement" origin replacement)))
  346. mapping)))
  347. (define replacement-table
  348. (alist->vhash hash-mapping))
  349. (define prefix-len
  350. (string-length directory))
  351. (define (destination file)
  352. (string-append output (string-drop file prefix-len)))
  353. (define (rewrite-leaf file)
  354. (let ((stat (lstat file))
  355. (dest (destination file)))
  356. (mkdir-p* (dirname dest))
  357. (case (stat:type stat)
  358. ((symlink)
  359. (let ((target (readlink file)))
  360. (symlink (call-with-output-string
  361. (lambda (output)
  362. (replace-store-references (open-input-string target)
  363. output replacement-table
  364. store)))
  365. dest)))
  366. ((regular)
  367. (call-with-input-file file
  368. (lambda (input)
  369. (call-with-output-file dest
  370. (lambda (output)
  371. (replace-store-references input output replacement-table
  372. store)
  373. (chmod output (stat:perms stat)))))))
  374. ((directory)
  375. (mkdir-p* dest))
  376. (else
  377. (error "unsupported file type" stat)))))
  378. ;; Use 'exit-on-exception' to force an exit upon I/O errors, given that
  379. ;; 'n-par-for-each' silently swallows exceptions.
  380. ;; See <http://bugs.gnu.org/23581>.
  381. (n-par-for-each (parallel-job-count)
  382. (exit-on-exception rewrite-leaf)
  383. (find-files directory (const #t)
  384. #:directories? #t))
  385. (rename-matching-files output mapping))
  386. (define %graft-hooks
  387. ;; Default list of hooks run after grafting.
  388. (list graft-debug-links))
  389. (define* (graft old-outputs new-outputs mapping
  390. #:key (log-port (current-output-port))
  391. (hooks %graft-hooks))
  392. "Apply the grafts described by MAPPING on OLD-OUTPUTS, leading to
  393. NEW-OUTPUTS. MAPPING must be a list of file name pairs; OLD-OUTPUTS and
  394. NEW-OUTPUTS are lists of output name/file name pairs."
  395. (for-each (lambda (input output)
  396. (format log-port "grafting '~a' -> '~a'...~%" input output)
  397. (force-output)
  398. (rewrite-directory input output mapping))
  399. (match old-outputs
  400. (((names . files) ...)
  401. files))
  402. (match new-outputs
  403. (((names . files) ...)
  404. files)))
  405. (for-each (lambda (hook)
  406. (hook old-outputs new-outputs mapping
  407. #:log-port log-port))
  408. hooks))
  409. ;;; graft.scm ends here