deduplication.scm 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
  3. ;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
  4. ;;;
  5. ;;; This file is part of GNU Guix.
  6. ;;;
  7. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  8. ;;; under the terms of the GNU General Public License as published by
  9. ;;; the Free Software Foundation; either version 3 of the License, or (at
  10. ;;; your option) any later version.
  11. ;;;
  12. ;;; GNU Guix is distributed in the hope that it will be useful, but
  13. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;;; GNU General Public License for more details.
  16. ;;;
  17. ;;; You should have received a copy of the GNU General Public License
  18. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  19. ;;; This houses stuff we do to files when they arrive at the store - resetting
  20. ;;; timestamps, deduplicating, etc.
  21. (define-module (guix store deduplication)
  22. #:use-module (gcrypt hash)
  23. #:use-module (guix build utils)
  24. #:use-module (guix build syscalls)
  25. #:use-module (guix base32)
  26. #:use-module (srfi srfi-11)
  27. #:use-module (rnrs io ports)
  28. #:use-module (ice-9 ftw)
  29. #:use-module (ice-9 match)
  30. #:use-module (guix serialization)
  31. #:export (nar-sha256
  32. deduplicate))
  33. ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
  34. ;; 'port-position' throws to 'out-of-range' when the offset is great than or
  35. ;; equal to 2^32: <https://bugs.gnu.org/32161>.
  36. (define (counting-wrapper-port output-port)
  37. "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
  38. retrieve the number of bytes written to OUTPUT-PORT."
  39. (let ((byte-count 0))
  40. (values (make-custom-binary-output-port "counting-wrapper"
  41. (lambda (bytes offset count)
  42. (put-bytevector output-port bytes
  43. offset count)
  44. (set! byte-count
  45. (+ byte-count count))
  46. count)
  47. (lambda ()
  48. byte-count)
  49. #f
  50. (lambda ()
  51. (close-port output-port)))
  52. (lambda ()
  53. byte-count))))
  54. (define (nar-sha256 file)
  55. "Gives the sha256 hash of a file and the size of the file in nar form."
  56. (let*-values (((port get-hash) (open-sha256-port))
  57. ((wrapper get-size) (counting-wrapper-port port)))
  58. (write-file file wrapper)
  59. (force-output wrapper)
  60. (force-output port)
  61. (let ((hash (get-hash))
  62. (size (get-size)))
  63. (close-port wrapper)
  64. (values hash size))))
  65. (define (tempname-in directory)
  66. "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
  67. unused by the time you create anything with that name, but a good shot."
  68. (let ((const-part (string-append directory "/.tmp-link-"
  69. (number->string (getpid)))))
  70. (let try ((guess-part
  71. (number->string (random most-positive-fixnum) 16)))
  72. (if (file-exists? (string-append const-part "-" guess-part))
  73. (try (number->string (random most-positive-fixnum) 16))
  74. (string-append const-part "-" guess-part)))))
  75. (define* (get-temp-link target #:optional (link-prefix (dirname target)))
  76. "Like mkstemp!, but instead of creating a new file and giving you the name,
  77. it creates a new hardlink to TARGET and gives you the name. Since
  78. cross-file-system hardlinks don't work, the temp link must be created on the
  79. same file system - where in that file system it is can be controlled by
  80. LINK-PREFIX."
  81. (let try ((tempname (tempname-in link-prefix)))
  82. (catch 'system-error
  83. (lambda ()
  84. (link target tempname)
  85. tempname)
  86. (lambda args
  87. (if (= (system-error-errno args) EEXIST)
  88. (try (tempname-in link-prefix))
  89. (apply throw args))))))
  90. (define (call-with-writable-file file store thunk)
  91. (if (string=? file store)
  92. (thunk) ;don't meddle with the store's permissions
  93. (let ((stat (lstat file)))
  94. (dynamic-wind
  95. (lambda ()
  96. (make-file-writable file))
  97. thunk
  98. (lambda ()
  99. (set-file-time file stat)
  100. (chmod file (stat:mode stat)))))))
  101. (define-syntax-rule (with-writable-file file store exp ...)
  102. "Make FILE writable for the dynamic extent of EXP..., except if FILE is the
  103. store."
  104. (call-with-writable-file file store (lambda () exp ...)))
  105. ;; There are 3 main kinds of errors we can get from hardlinking: "Too many
  106. ;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
  107. ;; "can't fit more stuff in this directory" (ENOSPC).
  108. (define* (replace-with-link target to-replace
  109. #:key (swap-directory (dirname target))
  110. (store (%store-directory)))
  111. "Atomically replace the file TO-REPLACE with a link to TARGET. Use
  112. SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
  113. and EMLINK, TO-REPLACE is left unchanged.
  114. Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
  115. (define temp-link
  116. (catch 'system-error
  117. (lambda ()
  118. (get-temp-link target swap-directory))
  119. (lambda args
  120. ;; We get ENOSPC when we can't fit an additional entry in
  121. ;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its
  122. ;; maximum number of links.
  123. (if (memv (system-error-errno args) `(,ENOSPC ,EMLINK))
  124. #f
  125. (apply throw args)))))
  126. ;; If we couldn't create TEMP-LINK, that's OK: just don't do the
  127. ;; replacement, which means TO-REPLACE won't be deduplicated.
  128. (when temp-link
  129. (with-writable-file (dirname to-replace) store
  130. (catch 'system-error
  131. (lambda ()
  132. (rename-file temp-link to-replace))
  133. (lambda args
  134. (delete-file temp-link)
  135. (unless (= EMLINK (system-error-errno args))
  136. (apply throw args)))))))
  137. (define* (deduplicate path hash #:key (store (%store-directory)))
  138. "Check if a store item with sha256 hash HASH already exists. If so,
  139. replace PATH with a hardlink to the already-existing one. If not, register
  140. PATH so that future duplicates can hardlink to it. PATH is assumed to be
  141. under STORE."
  142. (define links-directory
  143. (string-append store "/.links"))
  144. (mkdir-p links-directory)
  145. (let loop ((path path)
  146. (type (stat:type (lstat path)))
  147. (hash hash))
  148. (if (eq? 'directory type)
  149. ;; Can't hardlink directories, so hardlink their atoms.
  150. (for-each (match-lambda
  151. ((file . properties)
  152. (unless (member file '("." ".."))
  153. (let* ((file (string-append path "/" file))
  154. (type (match (assoc-ref properties 'type)
  155. ((or 'unknown #f)
  156. (stat:type (lstat file)))
  157. (type type))))
  158. (loop file type
  159. (and (not (eq? 'directory type))
  160. (nar-sha256 file)))))))
  161. (scandir* path))
  162. (let ((link-file (string-append links-directory "/"
  163. (bytevector->nix-base32-string hash))))
  164. (if (file-exists? link-file)
  165. (replace-with-link link-file path
  166. #:swap-directory links-directory
  167. #:store store)
  168. (catch 'system-error
  169. (lambda ()
  170. (link path link-file))
  171. (lambda args
  172. (let ((errno (system-error-errno args)))
  173. (cond ((= errno EEXIST)
  174. ;; Someone else put an entry for PATH in
  175. ;; LINKS-DIRECTORY before we could. Let's use it.
  176. (replace-with-link path link-file
  177. #:swap-directory
  178. links-directory
  179. #:store store))
  180. ((= errno ENOSPC)
  181. ;; There's not enough room in the directory index for
  182. ;; more entries in .links, but that's fine: we can
  183. ;; just stop.
  184. #f)
  185. ((= errno EMLINK)
  186. ;; PATH has reached the maximum number of links, but
  187. ;; that's OK: we just can't deduplicate it more.
  188. #f)
  189. (else (apply throw args)))))))))))