lzw_file_compression.sf 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. #!/usr/bin/ruby
  2. # Encode and decode small files using LZW compression.
  3. # See also:
  4. # https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
  5. define(
  6. MAGIC_SIGNATURE = ('LZW22' + 1.chr)
  7. )
  8. # Compress a string to a list of output symbols
  9. func compress(String uncompressed) -> Array {
  10. # Build the dictionary
  11. var dict_size = 256
  12. var dictionary = Hash(dict_size.of {|i| (i.chr, i) }...)
  13. var w = ''
  14. var result = []
  15. uncompressed.each { |c|
  16. var wc = w+c
  17. if (dictionary.has(wc)) {
  18. w = wc
  19. } else {
  20. result.append(dictionary{w})
  21. dictionary{wc} = dict_size++
  22. w = c
  23. }
  24. }
  25. # Output the code for w
  26. if (w != '') {
  27. result.append(dictionary{w})
  28. }
  29. return result
  30. }
  31. # Decompress a list of output ks to a string
  32. func decompress(Array compressed) -> String {
  33. # Build the dictionary
  34. var dict_size = 256
  35. var dictionary = dict_size.of { .chr }
  36. var w = compressed.shift.chr
  37. var result = [w]
  38. compressed.each { |k|
  39. var entry = if (k < dict_size) {
  40. dictionary[k]
  41. } elsif (k == dict_size) {
  42. w+w.char(0)
  43. } else {
  44. die "Bad compressed k: #{k}"
  45. }
  46. result.append(entry)
  47. # Add w+entry[0] to the dictionary
  48. dictionary.append(w+entry.char(0))
  49. ++dict_size
  50. w = entry
  51. }
  52. return result.join
  53. }
  54. func encode_integers (Array compressed) {
  55. var counts = []
  56. var count = 0
  57. var bits_per_symbol = 2
  58. var processed_len = 0
  59. for k in (compressed) {
  60. while (k >= bits_per_symbol) {
  61. if (count > 0) {
  62. counts << [bits_per_symbol.len(2)-1, count]
  63. processed_len += count
  64. }
  65. count = 0
  66. bits_per_symbol *= 2
  67. }
  68. ++count
  69. }
  70. counts << [bits_per_symbol.len(2)-1, compressed.len - processed_len]
  71. var clen = counts.len
  72. var header = clen.chr
  73. for b,len in (counts) {
  74. header += chr(b)
  75. header += pack('N', len)
  76. }
  77. var bits = []
  78. for b,len in (counts) {
  79. len > 0 || next
  80. bits << compressed.splice(0, len).map{ sprintf('%0*b', b, _) }...
  81. }
  82. header + pack('B*', bits.join)
  83. }
  84. func decode_integers (FileHandle fh) {
  85. var count_len = fh.getc.ord
  86. var counts = []
  87. count_len.times {
  88. var b = fh.getc.ord
  89. var l = Num(unpack('N', 4.of { fh.getc }.join))
  90. counts << [b, l]
  91. }
  92. var chunks = []
  93. var bits = fh.slurp.ascii2bin
  94. for b,len in (counts) {
  95. len > 0 || next
  96. chunks << bits.substr(0, b*len).slices(b)...
  97. bits.substr!(b*len)
  98. }
  99. chunks.map { Num(_, 2) }
  100. }
  101. # Compress file
  102. func lzw_compress_file(File file) {
  103. var compressed = compress(file.read(:raw))
  104. MAGIC_SIGNATURE + encode_integers(compressed)
  105. }
  106. # Decompress file
  107. func lzw_decompress_file(File file) {
  108. var fh = file.open('<:raw')
  109. if (MAGIC_SIGNATURE.len.of { fh.getc }.join != MAGIC_SIGNATURE) {
  110. die "Not a LZW22 archive!\n"
  111. }
  112. decompress(decode_integers(fh))
  113. }
  114. ARGV.getopt!('d', \var decode)
  115. var file = File(ARGV.shift) || do {
  116. say "usage: #{File(__MAIN__).basename} [-d] [input file]"
  117. Sys.exit(2)
  118. }
  119. if (decode || file.match(/\.lzw\.enc\z/)) {
  120. var decompressed = lzw_decompress_file(file)
  121. File("output.lzw.dec").write(decompressed, :raw)
  122. }
  123. else {
  124. var compressed = lzw_compress_file(file)
  125. File("output.lzw.enc").write(compressed, :raw)
  126. }