123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- #!/usr/bin/ruby
- # Encode and decode small files using LZW compression.
- # See also:
- # https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
- define(
- MAGIC_SIGNATURE = ('LZW22' + 1.chr)
- )
- # Compress a string to a list of output symbols
- func compress(String uncompressed) -> Array {
- # Build the dictionary
- var dict_size = 256
- var dictionary = Hash(dict_size.of {|i| (i.chr, i) }...)
- var w = ''
- var result = []
- uncompressed.each { |c|
- var wc = w+c
- if (dictionary.has(wc)) {
- w = wc
- } else {
- result.append(dictionary{w})
- dictionary{wc} = dict_size++
- w = c
- }
- }
- # Output the code for w
- if (w != '') {
- result.append(dictionary{w})
- }
- return result
- }
- # Decompress a list of output ks to a string
- func decompress(Array compressed) -> String {
- # Build the dictionary
- var dict_size = 256
- var dictionary = dict_size.of { .chr }
- var w = compressed.shift.chr
- var result = [w]
- compressed.each { |k|
- var entry = if (k < dict_size) {
- dictionary[k]
- } elsif (k == dict_size) {
- w+w.char(0)
- } else {
- die "Bad compressed k: #{k}"
- }
- result.append(entry)
- # Add w+entry[0] to the dictionary
- dictionary.append(w+entry.char(0))
- ++dict_size
- w = entry
- }
- return result.join
- }
- func encode_integers (Array compressed) {
- var counts = []
- var count = 0
- var bits_per_symbol = 2
- var processed_len = 0
- for k in (compressed) {
- while (k >= bits_per_symbol) {
- if (count > 0) {
- counts << [bits_per_symbol.len(2)-1, count]
- processed_len += count
- }
- count = 0
- bits_per_symbol *= 2
- }
- ++count
- }
- counts << [bits_per_symbol.len(2)-1, compressed.len - processed_len]
- var clen = counts.len
- var header = clen.chr
- for b,len in (counts) {
- header += chr(b)
- header += pack('N', len)
- }
- var bits = []
- for b,len in (counts) {
- len > 0 || next
- bits << compressed.splice(0, len).map{ sprintf('%0*b', b, _) }...
- }
- header + pack('B*', bits.join)
- }
- func decode_integers (FileHandle fh) {
- var count_len = fh.getc.ord
- var counts = []
- count_len.times {
- var b = fh.getc.ord
- var l = Num(unpack('N', 4.of { fh.getc }.join))
- counts << [b, l]
- }
- var chunks = []
- var bits = fh.slurp.ascii2bin
- for b,len in (counts) {
- len > 0 || next
- chunks << bits.substr(0, b*len).slices(b)...
- bits.substr!(b*len)
- }
- chunks.map { Num(_, 2) }
- }
- # Compress file
- func lzw_compress_file(File file) {
- var compressed = compress(file.read(:raw))
- MAGIC_SIGNATURE + encode_integers(compressed)
- }
- # Decompress file
- func lzw_decompress_file(File file) {
- var fh = file.open('<:raw')
- if (MAGIC_SIGNATURE.len.of { fh.getc }.join != MAGIC_SIGNATURE) {
- die "Not a LZW22 archive!\n"
- }
- decompress(decode_integers(fh))
- }
- ARGV.getopt!('d', \var decode)
- var file = File(ARGV.shift) || do {
- say "usage: #{File(__MAIN__).basename} [-d] [input file]"
- Sys.exit(2)
- }
- if (decode || file.match(/\.lzw\.enc\z/)) {
- var decompressed = lzw_decompress_file(file)
- File("output.lzw.dec").write(decompressed, :raw)
- }
- else {
- var compressed = lzw_compress_file(file)
- File("output.lzw.enc").write(compressed, :raw)
- }
|