tarball.scm 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. ;;; Disarchive
  2. ;;; Copyright © 2020 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2020, 2021 Timothy Sample <samplet@ngyro.com>
  4. ;;;
  5. ;;; This file is part of Disarchive.
  6. ;;;
  7. ;;; Disarchive is free software: you can redistribute it and/or modify
  8. ;;; it under the terms of the GNU General Public License as published by
  9. ;;; the Free Software Foundation, either version 3 of the License, or
  10. ;;; (at your option) any later version.
  11. ;;;
  12. ;;; Disarchive is distributed in the hope that it will be useful,
  13. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;;; GNU General Public License for more details.
  16. ;;;
  17. ;;; You should have received a copy of the GNU General Public License
  18. ;;; along with Disarchive. If not, see <http://www.gnu.org/licenses/>.
  19. (define-module (disarchive assemblers tarball)
  20. #:use-module (disarchive assemblers)
  21. #:use-module (disarchive binary-filenames)
  22. #:use-module (disarchive config)
  23. #:use-module (disarchive digests)
  24. #:use-module (disarchive disassemblers)
  25. #:use-module (disarchive kinds binary-string)
  26. #:use-module (disarchive kinds octal)
  27. #:use-module (disarchive kinds tar-header)
  28. #:use-module (disarchive kinds zero-string)
  29. #:use-module (disarchive logging)
  30. #:use-module (disarchive serialization)
  31. #:use-module (disarchive utils)
  32. #:use-module (gcrypt hash)
  33. #:use-module (ice-9 binary-ports)
  34. #:use-module (ice-9 match)
  35. #:use-module (rnrs bytevectors)
  36. #:use-module (srfi srfi-9)
  37. #:export (<tarball>
  38. make-tarball
  39. tarball?
  40. tarball-name
  41. tarball-input
  42. tarball-headers
  43. tarball-padding
  44. tarball-digest
  45. serialize-tarball
  46. serialized-tarball?
  47. deserialize-tarball
  48. tarball-file?
  49. disassemble-tarball
  50. tarball-assembler
  51. tarball-disassembler))
  52. ;;; Commentary:
  53. ;;;
  54. ;;; This module provides procedures for taking apart and reassembling
  55. ;;; tarball files. The idea is to store metadata that allows
  56. ;;; recreating the tarball file bit-for-bit given the original files.
  57. ;;;
  58. ;;; Code:
  59. ;; Data
  60. (define-record-type <tarball>
  61. (make-tarball name input headers padding digest)
  62. tarball?
  63. (name tarball-name) ; string
  64. (input tarball-input) ; blueprint
  65. (headers tarball-headers) ; list of <tar-header>
  66. (padding tarball-padding) ; number or bytevector
  67. (digest tarball-digest)) ; <digest>
  68. (define (tarball-inputs tarball)
  69. (list (tarball-input tarball)))
  70. (define (serialize-tarball tarball)
  71. (match-let* ((($ <tarball> name input headers padding digest) tarball)
  72. (defaults (default-tar-header headers)))
  73. `(tarball
  74. (name ,name)
  75. (digest ,(digest->sexp digest))
  76. (default-header ,@(serialize -tar-header- defaults
  77. %default-default-tar-header))
  78. (headers ,@(map (lambda (header)
  79. (serialize -tar-header- header defaults))
  80. headers))
  81. (padding ,padding)
  82. (input ,(serialize-blueprint input)))))
  83. (define (serialized-tarball? sexp)
  84. (match sexp
  85. (('tarball _ ...) #t)
  86. (_ #f)))
  87. (define (deserialize-tarball sexp)
  88. (match sexp
  89. (('tarball
  90. ('name name)
  91. ('digest digest-sexp)
  92. ('default-header . defaults-sexp)
  93. ('headers . header-sexps)
  94. ('padding padding)
  95. ('input input-sexp))
  96. (make-tarball
  97. name
  98. (deserialize-blueprint input-sexp)
  99. (let ((defaults (deserialize -tar-header- defaults-sexp
  100. %default-default-tar-header)))
  101. (map (lambda (sexp)
  102. (deserialize -tar-header- sexp defaults))
  103. header-sexps))
  104. padding
  105. (sexp->digest digest-sexp)))
  106. (_ #f)))
  107. ;; Assembly
  108. (define (regular-file/fixed? filename)
  109. (define %lstat/fixed
  110. (match filename
  111. ((? string?) lstat/utf8)
  112. ((? bytevector?) lstat/binary)
  113. (_ "Invalid string" filename)))
  114. (and=> (false-if-exception (%lstat/fixed filename))
  115. (lambda (st)
  116. (eq? (stat:type st) 'regular))))
  117. (define* (open-input-file/fixed filename #:key binary?)
  118. (define %open-input-file/fixed
  119. (match filename
  120. ((? string?) open-input-file/utf8)
  121. ((? bytevector?) open-input-file/binary)
  122. (_ "Invalid string" filename)))
  123. (%open-input-file/fixed filename #:binary? binary?))
  124. (define (write-data-padding data-padding size port)
  125. (let* ((remainder (modulo size 512))
  126. (len (if (zero? remainder) 0 (- 512 remainder)))
  127. (bv (make-bytevector len)))
  128. (encode-binary-string data-padding bv)
  129. (put-bytevector port bv)))
  130. (define (assemble-tarball tarball workspace)
  131. (match-let* ((($ <tarball> name input-blueprint
  132. headers padding digest) tarball)
  133. (input-digest (blueprint-digest input-blueprint))
  134. (input (digest->filename input-digest workspace))
  135. (output (digest->filename digest workspace)))
  136. (message "Assembling the tarball ~a" name)
  137. (call-with-output-file output
  138. (lambda (out)
  139. (for-each (lambda (header)
  140. (let* ((path (tar-header-path header))
  141. (size (tar-header-size header))
  142. (source (string-append input "/" path))
  143. (data-padding (tar-header-data-padding header)))
  144. (write-tar-header out header)
  145. (when (and (not (zero? size))
  146. (regular-file/fixed? source))
  147. (let ((in (open-input-file/fixed source
  148. #:binary? #t)))
  149. (dump-port-all in out)
  150. (close-port in))
  151. (write-data-padding data-padding size out))
  152. (unless (or (zero? size)
  153. (regular-file/fixed? source))
  154. (message "WARNING: Ignoring irregular file: ~a"
  155. source))))
  156. headers)
  157. (let ((zeros (make-bytevector 512 0)))
  158. (put-bytevector out zeros)
  159. (put-bytevector out zeros))
  160. (put-bytevector out (if (number? padding)
  161. (make-bytevector padding 0)
  162. padding))))))
  163. ;; Disassembly
  164. (define (tarball-file? filename st)
  165. "Check if FILENAME is a tar file."
  166. (and (eq? (stat:type st) 'regular)
  167. (call-with-input-file filename
  168. (lambda (in)
  169. (define bv (get-bytevector-n in 512))
  170. (and (bytevector? bv)
  171. (= (bytevector-length bv) 512)
  172. (let* ((header (bytevector->tar-header bv))
  173. (name (tar-header-name header))
  174. (expected-chksum (tar-header-chksum header)))
  175. (bytevector-copy! (make-bytevector 8 #x20) 0 bv 148 8)
  176. (let ((actual-chksum
  177. (let lp ((k 0) (sum 0))
  178. (if (< k 512)
  179. (lp (1+ k) (+ sum (bytevector-u8-ref bv k)))
  180. sum))))
  181. (= expected-chksum actual-chksum))))))))
  182. (define (consumer port)
  183. "Return a procedure that consumes or skips the given number of bytes from
  184. PORT."
  185. (if (false-if-exception (seek port 0 SEEK_CUR))
  186. (lambda (len)
  187. (seek port len SEEK_CUR))
  188. (lambda (len)
  189. (define bv (make-bytevector 8192))
  190. (let loop ((len len))
  191. (define block (min len (bytevector-length bv)))
  192. (unless (or (zero? block)
  193. (eof-object? (get-bytevector-n! port bv 0 block)))
  194. (loop (- len block)))))))
  195. (define (read-headers port)
  196. (define skip
  197. (consumer port))
  198. (define (read-data-padding port count)
  199. (let ((padding (get-bytevector-n port count)))
  200. (if (bytevector-zero? padding) "" (decode-binary-string padding))))
  201. (let loop ((result '()))
  202. (define header (read-tar-header port))
  203. (if (end-of-tarball-object? header)
  204. (reverse! result)
  205. (let* ((size (tar-header-size header))
  206. (padding-size (modulo (- 512 (modulo size 512)) 512)))
  207. (if (= (tar-header-typeflag header) (char->integer #\g))
  208. (loop (cons header result))
  209. (begin
  210. (skip size)
  211. (let ((padding (read-data-padding port padding-size)))
  212. (loop (cons (set-tar-header-data-padding header padding)
  213. result)))))))))
  214. (define (read-headers-from-file filename)
  215. (define (read-file-padding port)
  216. (let ((padding (get-bytevector-all port)))
  217. (match padding
  218. ((? eof-object?) 0)
  219. (_ (if (bytevector-zero? padding)
  220. (bytevector-length padding)
  221. (decode-binary-string padding))))))
  222. (call-with-input-file filename
  223. (lambda (port)
  224. (values
  225. (read-headers port)
  226. (read-file-padding port)))))
  227. (define* (disassemble-tarball filename #:optional
  228. (algorithm (hash-algorithm sha256))
  229. #:key (name (basename filename)))
  230. (message "Disassembling the tarball ~a" name)
  231. (call-with-values (lambda () (read-headers-from-file filename))
  232. (lambda (headers padding)
  233. (message "Read ~a headers" (length headers))
  234. (let ((input (call-with-temporary-directory
  235. (lambda (directory)
  236. (message "Extracting the tarball ~a" name)
  237. (invoke %tar "-C" directory "-xf" filename)
  238. (disassemble directory algorithm
  239. #:name (basename name ".tar"))))))
  240. (make-tarball
  241. name
  242. input
  243. headers
  244. padding
  245. (file-digest filename algorithm))))))
  246. ;; Interfaces
  247. (define tarball-assembler
  248. (make-assembler tarball?
  249. tarball-name
  250. tarball-digest
  251. (compose list tarball-input)
  252. serialize-tarball
  253. serialized-tarball?
  254. deserialize-tarball
  255. assemble-tarball))
  256. (define tarball-disassembler
  257. (make-disassembler tarball-file?
  258. disassemble-tarball))