lzlib.scm 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2019 Pierre Neidhardt <mail@ambrevar.xyz>
  3. ;;; Copyright © 2019-2020, 2024 Ludovic Courtès <ludo@gnu.org>
  4. ;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
  5. ;;;
  6. ;;; This file is part of GNU Guix.
  7. ;;;
  8. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  9. ;;; under the terms of the GNU General Public License as published by
  10. ;;; the Free Software Foundation; either version 3 of the License, or (at
  11. ;;; your option) any later version.
  12. ;;;
  13. ;;; GNU Guix is distributed in the hope that it will be useful, but
  14. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. ;;; GNU General Public License for more details.
  17. ;;;
  18. ;;; You should have received a copy of the GNU General Public License
  19. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  20. (define-module (lzlib)
  21. #:use-module (lzlib config)
  22. #:use-module (rnrs bytevectors)
  23. #:use-module (rnrs arithmetic bitwise)
  24. #:use-module (ice-9 binary-ports)
  25. #:use-module (ice-9 match)
  26. #:use-module (system foreign)
  27. #:use-module (srfi srfi-11)
  28. #:export (make-lzip-input-port
  29. make-lzip-output-port
  30. make-lzip-input-port/compressed
  31. call-with-lzip-input-port
  32. call-with-lzip-output-port
  33. %default-member-length-limit
  34. %default-compression-level
  35. dictionary-size+match-length-limit))
  36. ;;; Commentary:
  37. ;;;
  38. ;;; This file is extracted from Guix and originally written by Pierre Neidhardt
  39. ;;; and Ludovic Courtès.
  40. ;;;
  41. ;;; Bindings to the lzlib / liblz API. Some convenience functions are also
  42. ;;; provided (see the export).
  43. ;;;
  44. ;;; While the bindings are complete, the convenience functions only support
  45. ;;; single member archives. To decompress single member archives, we loop
  46. ;;; until lz-decompress-read returns 0. This is simpler. To support multiple
  47. ;;; members properly, we need (among others) to call lz-decompress-finish and
  48. ;;; loop over lz-decompress-read until lz-decompress-finished? returns #t.
  49. ;;; Otherwise a multi-member archive starting with an empty member would only
  50. ;;; decompress the empty member and stop there, resulting in truncated output.
  51. ;;; Code:
  52. (define %liblz-handle
  53. (delay (dynamic-link %liblz)))
  54. (define register-allocation
  55. ;; Let the GC know that an unmanaged heap allocation took place.
  56. (pointer->procedure void
  57. (dynamic-func "scm_gc_register_allocation"
  58. (dynamic-link))
  59. (list size_t)))
  60. (define (lzlib-procedure ret name parameters)
  61. "Return a procedure corresponding to C function NAME in liblz, or #f if
  62. either lzlib or the function could not be found."
  63. (match (false-if-exception (dynamic-func name (force %liblz-handle)))
  64. ((? pointer? ptr)
  65. (pointer->procedure ret ptr parameters))
  66. (#f
  67. #f)))
  68. (define-wrapped-pointer-type <lz-decoder>
  69. ;; Scheme counterpart of the 'LZ_Decoder' opaque type.
  70. lz-decoder?
  71. pointer->lz-decoder
  72. lz-decoder->pointer
  73. (lambda (obj port)
  74. (format port "#<lz-decoder ~a>"
  75. (number->string (object-address obj) 16))))
  76. (define-wrapped-pointer-type <lz-encoder>
  77. ;; Scheme counterpart of the 'LZ_Encoder' opaque type.
  78. lz-encoder?
  79. pointer->lz-encoder
  80. lz-encoder->pointer
  81. (lambda (obj port)
  82. (format port "#<lz-encoder ~a>"
  83. (number->string (object-address obj) 16))))
  84. ;; From lzlib.h
  85. (define %error-number-ok 0)
  86. (define %error-number-bad-argument 1)
  87. (define %error-number-mem-error 2)
  88. (define %error-number-sequence-error 3)
  89. (define %error-number-header-error 4)
  90. (define %error-number-unexpected-eof 5)
  91. (define %error-number-data-error 6)
  92. (define %error-number-library-error 7)
  93. ;; Compression bindings.
  94. (define %lz-compress-close
  95. (delay (dynamic-func "LZ_compress_close" (force %liblz-handle))))
  96. (define lz-compress-open
  97. (let ((proc (lzlib-procedure '* "LZ_compress_open" (list int int uint64)))
  98. ;; member-size is an "unsigned long long", and the C standard guarantees
  99. ;; a minimum range of 0..2^64-1.
  100. (unlimited-size (- (expt 2 64) 1)))
  101. (lambda* (dictionary-size match-length-limit #:optional (member-size unlimited-size))
  102. "Initialize the internal stream state for compression and returns a
  103. pointer that can only be used as the encoder argument for the other
  104. lz-compress functions, or a null pointer if the encoder could not be
  105. allocated.
  106. See the manual: (lzlib) Compression functions."
  107. (let ((encoder-ptr (proc dictionary-size match-length-limit member-size)))
  108. (if (not (= (lz-compress-error encoder-ptr) -1))
  109. (begin
  110. ;; Since the allocation behind the encoder may be large, notify
  111. ;; the GC so it can be triggered more quickly.
  112. (register-allocation (* 2 dictionary-size))
  113. ;; Register a finalizer for the encoder. The other option
  114. ;; would be to call 'LZ_compress_close' explicitly from the
  115. ;; 'close' method of the output port but that doesn't work
  116. ;; because the 'close' method is not called when the port is
  117. ;; GC'd. Furthermore, 'LZ_compress_close' can only be called
  118. ;; once, so we can't do both.
  119. (set-pointer-finalizer! encoder-ptr (force %lz-compress-close))
  120. (pointer->lz-encoder encoder-ptr))
  121. (throw 'lzlib-error 'lz-compress-open))))))
  122. (define lz-compress-finish
  123. (let ((proc (lzlib-procedure int "LZ_compress_finish" '(*))))
  124. (lambda (encoder)
  125. "Tell that all the data for this member have already been written (with
  126. the `lz-compress-write' function). It is safe to call `lz-compress-finish' as
  127. many times as needed. After all the produced compressed data have been read
  128. with `lz-compress-read' and `lz-compress-member-finished?' returns #t, a new
  129. member can be started with 'lz-compress-restart-member'."
  130. (let ((ret (proc (lz-encoder->pointer encoder))))
  131. (if (= ret -1)
  132. (throw 'lzlib-error 'lz-compress-finish (lz-compress-error encoder))
  133. ret)))))
  134. (define lz-compress-restart-member
  135. (let ((proc (lzlib-procedure int "LZ_compress_restart_member" (list '* uint64))))
  136. (lambda (encoder member-size)
  137. "Start a new member in a multimember data stream.
  138. Call this function only after `lz-compress-member-finished?' indicates that the
  139. current member has been fully read (with the `lz-compress-read' function)."
  140. (let ((ret (proc (lz-encoder->pointer encoder) member-size)))
  141. (if (= ret -1)
  142. (throw 'lzlib-error 'lz-compress-restart-member
  143. (lz-compress-error encoder))
  144. ret)))))
  145. (define lz-compress-sync-flush
  146. (let ((proc (lzlib-procedure int "LZ_compress_sync_flush" (list '*))))
  147. (lambda (encoder)
  148. "Make available to `lz-compress-read' all the data already written with
  149. the `LZ-compress-write' function. First call `lz-compress-sync-flush'. Then
  150. call 'lz-compress-read' until it returns 0.
  151. Repeated use of `LZ-compress-sync-flush' may degrade compression ratio,
  152. so use it only when needed. "
  153. (let ((ret (proc (lz-encoder->pointer encoder))))
  154. (if (= ret -1)
  155. (throw 'lzlib-error 'lz-compress-sync-flush
  156. (lz-compress-error encoder))
  157. ret)))))
  158. (define lz-compress-read
  159. (let ((proc (lzlib-procedure int "LZ_compress_read" (list '* '* int))))
  160. (lambda* (encoder lzfile-bv #:optional (start 0) (count (bytevector-length lzfile-bv)))
  161. "Read up to COUNT bytes from the encoder stream, storing the results in LZFILE-BV.
  162. Return the number of uncompressed bytes written, a positive integer."
  163. (let ((ret (proc (lz-encoder->pointer encoder)
  164. (bytevector->pointer lzfile-bv start)
  165. count)))
  166. (if (= ret -1)
  167. (throw 'lzlib-error 'lz-compress-read (lz-compress-error encoder))
  168. ret)))))
  169. (define lz-compress-write
  170. (let ((proc (lzlib-procedure int "LZ_compress_write" (list '* '* int))))
  171. (lambda* (encoder bv #:optional (start 0) (count (bytevector-length bv)))
  172. "Write up to COUNT bytes from BV to the encoder stream. Return the
  173. number of uncompressed bytes written, a strictly positive integer."
  174. (let ((ret (proc (lz-encoder->pointer encoder)
  175. (bytevector->pointer bv start)
  176. count)))
  177. (if (< ret 0)
  178. (throw 'lzlib-error 'lz-compress-write (lz-compress-error encoder))
  179. ret)))))
  180. (define lz-compress-write-size
  181. (let ((proc (lzlib-procedure int "LZ_compress_write_size" '(*))))
  182. (lambda (encoder)
  183. "The maximum number of bytes that can be immediately written through the
  184. `lz-compress-write' function.
  185. It is guaranteed that an immediate call to `lz-compress-write' will accept a
  186. SIZE up to the returned number of bytes. "
  187. (let ((ret (proc (lz-encoder->pointer encoder))))
  188. (if (= ret -1)
  189. (throw 'lzlib-error 'lz-compress-write-size (lz-compress-error encoder))
  190. ret)))))
  191. (define lz-compress-error
  192. (let ((proc (lzlib-procedure int "LZ_compress_errno" '(*))))
  193. (lambda (encoder)
  194. "ENCODER can be a Scheme object or a pointer."
  195. (let* ((error-number (proc (if (lz-encoder? encoder)
  196. (lz-encoder->pointer encoder)
  197. encoder))))
  198. error-number))))
  199. (define lz-compress-finished?
  200. (let ((proc (lzlib-procedure int "LZ_compress_finished" '(*))))
  201. (lambda (encoder)
  202. "Return #t if all the data have been read, otherwise return #f."
  203. (let ((ret (proc (lz-encoder->pointer encoder))))
  204. (match ret
  205. (1 #t)
  206. (0 #f)
  207. (_ (throw 'lzlib-error 'lz-compress-finished? (lz-compress-error encoder))))))))
  208. (define lz-compress-member-finished?
  209. (let ((proc (lzlib-procedure int "LZ_compress_member_finished" '(*))))
  210. (lambda (encoder)
  211. "Return #t if the current member, in a multimember data stream, has
  212. been fully read and 'lz-compress-restart-member' can be safely called.
  213. Otherwise return #f."
  214. (let ((ret (proc (lz-encoder->pointer encoder))))
  215. (match ret
  216. (1 #t)
  217. (0 #f)
  218. (_ (throw 'lzlib-error 'lz-compress-member-finished? (lz-compress-error encoder))))))))
  219. (define lz-compress-data-position
  220. (let ((proc (lzlib-procedure uint64 "LZ_compress_data_position" '(*))))
  221. (lambda (encoder)
  222. "Return the number of input bytes already compressed in the current
  223. member."
  224. (let ((ret (proc (lz-encoder->pointer encoder))))
  225. (if (= ret -1)
  226. (throw 'lzlib-error 'lz-compress-data-position
  227. (lz-compress-error encoder))
  228. ret)))))
  229. (define lz-compress-member-position
  230. (let ((proc (lzlib-procedure uint64 "LZ_compress_member_position" '(*))))
  231. (lambda (encoder)
  232. "Return the number of compressed bytes already produced, but perhaps
  233. not yet read, in the current member."
  234. (let ((ret (proc (lz-encoder->pointer encoder))))
  235. (if (= ret -1)
  236. (throw 'lzlib-error 'lz-compress-member-position
  237. (lz-compress-error encoder))
  238. ret)))))
  239. (define lz-compress-total-in-size
  240. (let ((proc (lzlib-procedure uint64 "LZ_compress_total_in_size" '(*))))
  241. (lambda (encoder)
  242. "Return the total number of input bytes already compressed."
  243. (let ((ret (proc (lz-encoder->pointer encoder))))
  244. (if (= ret -1)
  245. (throw 'lzlib-error 'lz-compress-total-in-size
  246. (lz-compress-error encoder))
  247. ret)))))
  248. (define lz-compress-total-out-size
  249. (let ((proc (lzlib-procedure uint64 "LZ_compress_total_out_size" '(*))))
  250. (lambda (encoder)
  251. "Return the total number of compressed bytes already produced, but
  252. perhaps not yet read."
  253. (let ((ret (proc (lz-encoder->pointer encoder))))
  254. (if (= ret -1)
  255. (throw 'lzlib-error 'lz-compress-total-out-size
  256. (lz-compress-error encoder))
  257. ret)))))
  258. ;; Decompression bindings.
  259. (define %lz-decompress-close
  260. (delay (dynamic-func "LZ_decompress_close" (force %liblz-handle))))
  261. (define lz-decompress-open
  262. (let ((proc (lzlib-procedure '* "LZ_decompress_open" '())))
  263. (lambda ()
  264. "Initializes the internal stream state for decompression and returns a
  265. pointer that can only be used as the decoder argument for the other
  266. lz-decompress functions, or a null pointer if the decoder could not be
  267. allocated.
  268. See the manual: (lzlib) Decompression functions."
  269. (let ((decoder-ptr (proc)))
  270. (if (not (= (lz-decompress-error decoder-ptr) -1))
  271. (begin
  272. (register-allocation (expt 2 16)) ;XXX: rough estimate
  273. (set-pointer-finalizer! decoder-ptr (force %lz-decompress-close))
  274. (pointer->lz-decoder decoder-ptr))
  275. (throw 'lzlib-error 'lz-decompress-open))))))
  276. (define lz-decompress-finish
  277. (let ((proc (lzlib-procedure int "LZ_decompress_finish" '(*))))
  278. (lambda (decoder)
  279. "Tell that all the data for this stream have already been written (with
  280. the `lz-decompress-write' function). It is safe to call
  281. `lz-decompress-finish' as many times as needed."
  282. (let ((ret (proc (lz-decoder->pointer decoder))))
  283. (if (= ret -1)
  284. (throw 'lzlib-error 'lz-decompress-finish (lz-decompress-error decoder))
  285. ret)))))
  286. (define lz-decompress-reset
  287. (let ((proc (lzlib-procedure int "LZ_decompress_reset" '(*))))
  288. (lambda (decoder)
  289. "Reset the internal state of DECODER as it was just after opening it
  290. with the `lz-decompress-open' function. Data stored in the internal buffers
  291. is discarded. Position counters are set to 0."
  292. (let ((ret (proc (lz-decoder->pointer decoder))))
  293. (if (= ret -1)
  294. (throw 'lzlib-error 'lz-decompress-reset
  295. (lz-decompress-error decoder))
  296. ret)))))
  297. (define lz-decompress-sync-to-member
  298. (let ((proc (lzlib-procedure int "LZ_decompress_sync_to_member" '(*))))
  299. (lambda (decoder)
  300. "Reset the error state of DECODER and enters a search state that lasts
  301. until a new member header (or the end of the stream) is found. After a
  302. successful call to `lz-decompress-sync-to-member', data written with
  303. `lz-decompress-write' will be consumed and 'lz-decompress-read' will return 0
  304. until a header is found.
  305. This function is useful to discard any data preceding the first member, or to
  306. discard the rest of the current member, for example in case of a data
  307. error. If the decoder is already at the beginning of a member, this function
  308. does nothing."
  309. (let ((ret (proc (lz-decoder->pointer decoder))))
  310. (if (= ret -1)
  311. (throw 'lzlib-error 'lz-decompress-sync-to-member
  312. (lz-decompress-error decoder))
  313. ret)))))
  314. (define lz-decompress-read
  315. (let ((proc (lzlib-procedure int "LZ_decompress_read" (list '* '* int))))
  316. (lambda* (decoder file-bv #:optional (start 0) (count (bytevector-length file-bv)))
  317. "Read up to COUNT bytes from the decoder stream, storing the results in FILE-BV.
  318. Return the number of uncompressed bytes written, a non-negative positive integer."
  319. (let ((ret (proc (lz-decoder->pointer decoder)
  320. (bytevector->pointer file-bv start)
  321. count)))
  322. (if (< ret 0)
  323. (throw 'lzlib-error 'lz-decompress-read (lz-decompress-error decoder))
  324. ret)))))
  325. (define lz-decompress-write
  326. (let ((proc (lzlib-procedure int "LZ_decompress_write" (list '* '* int))))
  327. (lambda* (decoder bv #:optional (start 0) (count (bytevector-length bv)))
  328. "Write up to COUNT bytes from BV to the decoder stream. Return the
  329. number of uncompressed bytes written, a non-negative integer."
  330. (let ((ret (proc (lz-decoder->pointer decoder)
  331. (bytevector->pointer bv start)
  332. count)))
  333. (if (< ret 0)
  334. (throw 'lzlib-error 'lz-decompress-write (lz-decompress-error decoder))
  335. ret)))))
  336. (define lz-decompress-write-size
  337. (let ((proc (lzlib-procedure int "LZ_decompress_write_size" '(*))))
  338. (lambda (decoder)
  339. "Return the maximum number of bytes that can be immediately written
  340. through the `lz-decompress-write' function.
  341. It is guaranteed that an immediate call to `lz-decompress-write' will accept a
  342. SIZE up to the returned number of bytes. "
  343. (let ((ret (proc (lz-decoder->pointer decoder))))
  344. (if (= ret -1)
  345. (throw 'lzlib-error 'lz-decompress-write-size (lz-decompress-error decoder))
  346. ret)))))
  347. (define lz-decompress-error
  348. (let ((proc (lzlib-procedure int "LZ_decompress_errno" '(*))))
  349. (lambda (decoder)
  350. "DECODER can be a Scheme object or a pointer."
  351. (let* ((error-number (proc (if (lz-decoder? decoder)
  352. (lz-decoder->pointer decoder)
  353. decoder))))
  354. error-number))))
  355. (define lz-decompress-finished?
  356. (let ((proc (lzlib-procedure int "LZ_decompress_finished" '(*))))
  357. (lambda (decoder)
  358. "Return #t if all the data have been read, otherwise return #f."
  359. (let ((ret (proc (lz-decoder->pointer decoder))))
  360. (match ret
  361. (1 #t)
  362. (0 #f)
  363. (_ (throw 'lzlib-error 'lz-decompress-finished? (lz-decompress-error decoder))))))))
  364. (define lz-decompress-member-finished?
  365. (let ((proc (lzlib-procedure int "LZ_decompress_member_finished" '(*))))
  366. (lambda (decoder)
  367. "Return #t if the current member, in a multimember data stream, has
  368. been fully read and `lz-decompress-restart-member' can be safely called.
  369. Otherwise return #f."
  370. (let ((ret (proc (lz-decoder->pointer decoder))))
  371. (match ret
  372. (1 #t)
  373. (0 #f)
  374. (_ (throw 'lzlib-error 'lz-decompress-member-finished? (lz-decompress-error decoder))))))))
  375. (define lz-decompress-member-version
  376. (let ((proc (lzlib-procedure int "LZ_decompress_member_version" '(*))))
  377. (lambda (decoder)
  378. (let ((ret (proc (lz-decoder->pointer decoder))))
  379. "Return the version of current member from member header."
  380. (if (= ret -1)
  381. (throw 'lzlib-error 'lz-decompress-data-position
  382. (lz-decompress-error decoder))
  383. ret)))))
  384. (define lz-decompress-dictionary-size
  385. (let ((proc (lzlib-procedure int "LZ_decompress_dictionary_size" '(*))))
  386. (lambda (decoder)
  387. (let ((ret (proc (lz-decoder->pointer decoder))))
  388. "Return the dictionary size of current member from member header."
  389. (if (= ret -1)
  390. (throw 'lzlib-error 'lz-decompress-member-position
  391. (lz-decompress-error decoder))
  392. ret)))))
  393. (define lz-decompress-data-crc
  394. (let ((proc (lzlib-procedure unsigned-int "LZ_decompress_data_crc" '(*))))
  395. (lambda (decoder)
  396. (let ((ret (proc (lz-decoder->pointer decoder))))
  397. "Return the 32 bit Cyclic Redundancy Check of the data decompressed
  398. from the current member. The returned value is valid only when
  399. `lz-decompress-member-finished' returns #t. "
  400. (if (= ret -1)
  401. (throw 'lzlib-error 'lz-decompress-member-position
  402. (lz-decompress-error decoder))
  403. ret)))))
  404. (define lz-decompress-data-position
  405. (let ((proc (lzlib-procedure uint64 "LZ_decompress_data_position" '(*))))
  406. (lambda (decoder)
  407. "Return the number of decompressed bytes already produced, but perhaps
  408. not yet read, in the current member."
  409. (let ((ret (proc (lz-decoder->pointer decoder))))
  410. (if (= ret -1)
  411. (throw 'lzlib-error 'lz-decompress-data-position
  412. (lz-decompress-error decoder))
  413. ret)))))
  414. (define lz-decompress-member-position
  415. (let ((proc (lzlib-procedure uint64 "LZ_decompress_member_position" '(*))))
  416. (lambda (decoder)
  417. "Return the number of input bytes already decompressed in the current
  418. member."
  419. (let ((ret (proc (lz-decoder->pointer decoder))))
  420. (if (= ret -1)
  421. (throw 'lzlib-error 'lz-decompress-member-position
  422. (lz-decompress-error decoder))
  423. ret)))))
  424. (define lz-decompress-total-in-size
  425. (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_in_size" '(*))))
  426. (lambda (decoder)
  427. (let ((ret (proc (lz-decoder->pointer decoder))))
  428. "Return the total number of input bytes already compressed."
  429. (if (= ret -1)
  430. (throw 'lzlib-error 'lz-decompress-total-in-size
  431. (lz-decompress-error decoder))
  432. ret)))))
  433. (define lz-decompress-total-out-size
  434. (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_out_size" '(*))))
  435. (lambda (decoder)
  436. (let ((ret (proc (lz-decoder->pointer decoder))))
  437. "Return the total number of compressed bytes already produced, but
  438. perhaps not yet read."
  439. (if (= ret -1)
  440. (throw 'lzlib-error 'lz-decompress-total-out-size
  441. (lz-decompress-error decoder))
  442. ret)))))
  443. ;; High level functions.
  444. (define* (lzread! decoder port bv
  445. #:optional (start 0) (count (bytevector-length bv)))
  446. "Read up to COUNT bytes from PORT into BV at offset START. Return the
  447. number of uncompressed bytes actually read; it is zero if COUNT is zero or if
  448. the end-of-stream has been reached."
  449. (define (feed-decoder! decoder)
  450. ;; Feed DECODER with data read from PORT.
  451. (match (get-bytevector-n port (lz-decompress-write-size decoder))
  452. ((? eof-object? eof) eof)
  453. (bv (lz-decompress-write decoder bv))))
  454. (let loop ((read 0)
  455. (start start))
  456. (cond ((< read count)
  457. (match (lz-decompress-read decoder bv start (- count read))
  458. (0 (cond ((lz-decompress-finished? decoder)
  459. read)
  460. ((eof-object? (feed-decoder! decoder))
  461. (lz-decompress-finish decoder)
  462. (loop read start))
  463. (else ;read again
  464. (loop read start))))
  465. (n (loop (+ read n) (+ start n)))))
  466. (else
  467. read))))
  468. (define (lzwrite! encoder source source-offset source-count
  469. target target-offset target-count)
  470. "Write up to SOURCE-COUNT bytes from SOURCE to ENCODER, and read up to
  471. TARGET-COUNT bytes into TARGET at TARGET-OFFSET. Return two values: the
  472. number of bytes read from SOURCE, and the number of bytes written to TARGET,
  473. possibly zero."
  474. (define read
  475. (if (> (lz-compress-write-size encoder) 0)
  476. (match (lz-compress-write encoder source source-offset source-count)
  477. (0 (lz-compress-finish encoder) 0)
  478. (n n))
  479. 0))
  480. (define written
  481. (lz-compress-read encoder target target-offset target-count))
  482. (values read written))
  483. (define* (lzwrite encoder bv lz-port
  484. #:optional (start 0) (count (bytevector-length bv)))
  485. "Write up to COUNT bytes from BV at offset START into LZ-PORT. Return
  486. the number of uncompressed bytes written, a non-negative integer."
  487. (let ((written 0)
  488. (read 0))
  489. (while (and (< 0 (lz-compress-write-size encoder))
  490. (< written count))
  491. (set! written (+ written
  492. (lz-compress-write encoder bv (+ start written) (- count written)))))
  493. (when (= written 0)
  494. (lz-compress-finish encoder))
  495. (let ((lz-bv (make-bytevector written)))
  496. (let loop ((rd 0))
  497. (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
  498. (put-bytevector lz-port lz-bv 0 rd)
  499. (set! read (+ read rd))
  500. (unless (= rd 0)
  501. (loop rd))))
  502. ;; `written' is the total byte count of uncompressed data.
  503. written))
  504. ;;;
  505. ;;; Port interface.
  506. ;;;
  507. ;; Alist of (levels (dictionary-size match-length-limit)). 0 is the fastest.
  508. ;; See bbexample.c in lzlib's source.
  509. (define %compression-levels
  510. `((0 65535 16)
  511. (1 ,(bitwise-arithmetic-shift-left 1 20) 5)
  512. (2 ,(bitwise-arithmetic-shift-left 3 19) 6)
  513. (3 ,(bitwise-arithmetic-shift-left 1 21) 8)
  514. (4 ,(bitwise-arithmetic-shift-left 3 20) 12)
  515. (5 ,(bitwise-arithmetic-shift-left 1 22) 20)
  516. (6 ,(bitwise-arithmetic-shift-left 1 23) 36)
  517. (7 ,(bitwise-arithmetic-shift-left 1 24) 68)
  518. (8 ,(bitwise-arithmetic-shift-left 3 23) 132)
  519. (9 ,(bitwise-arithmetic-shift-left 1 25) 273)))
  520. (define %default-compression-level
  521. 6)
  522. (define (dictionary-size+match-length-limit level)
  523. "Return two values: the dictionary size for LEVEL, and its match-length
  524. limit. LEVEL must be a compression level, an integer between 0 and 9."
  525. (match (assv-ref %compression-levels level)
  526. ((dictionary-size match-length-limit)
  527. (values dictionary-size match-length-limit))))
  528. (define* (make-lzip-input-port port)
  529. "Return an input port that decompresses data read from PORT, a file port.
  530. PORT is automatically closed when the resulting port is closed."
  531. (define decoder (lz-decompress-open))
  532. (define (read! bv start count)
  533. (lzread! decoder port bv start count))
  534. (make-custom-binary-input-port "lzip-input" read! #f #f
  535. (lambda ()
  536. ;; DECODER will be freed by its pointer
  537. ;; finalizer.
  538. (close-port port))))
  539. (define* (make-lzip-output-port port
  540. #:key
  541. (level %default-compression-level))
  542. "Return an output port that compresses data at the given LEVEL, using PORT,
  543. a file port, as its sink. PORT is automatically closed when the resulting
  544. port is closed."
  545. (define encoder
  546. (call-with-values (lambda () (dictionary-size+match-length-limit level))
  547. lz-compress-open))
  548. (define (write! bv start count)
  549. (lzwrite encoder bv port start count))
  550. (make-custom-binary-output-port "lzip-output" write! #f #f
  551. (lambda ()
  552. (lz-compress-finish encoder)
  553. ;; "lz-read" the trailing metadata added by `lz-compress-finish'.
  554. (let ((lz-bv (make-bytevector (* 64 1024))))
  555. (let loop ((rd 0))
  556. (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
  557. (put-bytevector port lz-bv 0 rd)
  558. (unless (= rd 0)
  559. (loop rd))))
  560. ;; ENCODER will be freed by its pointer
  561. ;; finalizer.
  562. (close-port port))))
  563. (define* (make-lzip-input-port/compressed port
  564. #:key
  565. (level %default-compression-level))
  566. "Return an input port that compresses data read from PORT, with the given LEVEL.
  567. PORT is automatically closed when the resulting port is closed."
  568. (define encoder
  569. (call-with-values (lambda () (dictionary-size+match-length-limit level))
  570. lz-compress-open))
  571. (define input-buffer (make-bytevector 8192))
  572. (define input-len 0)
  573. (define input-offset 0)
  574. (define input-eof? #f)
  575. (define (read! bv start count)
  576. (cond
  577. (input-eof?
  578. (match (lz-compress-read encoder bv start count)
  579. (0 (if (lz-compress-finished? encoder)
  580. 0
  581. (read! bv start count)))
  582. (n n)))
  583. ((= input-offset input-len)
  584. (match (get-bytevector-n! port input-buffer 0
  585. (bytevector-length input-buffer))
  586. ((? eof-object?)
  587. (set! input-eof? #t)
  588. (lz-compress-finish encoder))
  589. (count
  590. (set! input-offset 0)
  591. (set! input-len count)))
  592. (read! bv start count))
  593. (else
  594. (let-values (((read written)
  595. (lzwrite! encoder
  596. input-buffer input-offset
  597. (- input-len input-offset)
  598. bv start count)))
  599. (set! input-offset (+ input-offset read))
  600. ;; Make sure we don't return zero except on EOF.
  601. (if (= 0 written)
  602. (read! bv start count)
  603. written)))))
  604. (make-custom-binary-input-port "lzip-input/compressed"
  605. read! #f #f
  606. (lambda ()
  607. (close-port port))))
  608. (define* (call-with-lzip-input-port port proc)
  609. "Call PROC with a port that wraps PORT and decompresses data read from it.
  610. PORT is closed upon completion."
  611. (let ((lzip (make-lzip-input-port port)))
  612. (dynamic-wind
  613. (const #t)
  614. (lambda ()
  615. (proc lzip))
  616. (lambda ()
  617. (close-port lzip)))))
  618. (define* (call-with-lzip-output-port port proc
  619. #:key
  620. (level %default-compression-level))
  621. "Call PROC with an output port that wraps PORT and compresses data. PORT is
  622. close upon completion."
  623. (let ((lzip (make-lzip-output-port port
  624. #:level level)))
  625. (dynamic-wind
  626. (const #t)
  627. (lambda ()
  628. (proc lzip))
  629. (lambda ()
  630. (close-port lzip)))))
  631. ;;; lzlib.scm ends here