adaptive_huffman_coding_with_escape_symbols.sf 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. #!/usr/bin/ruby
  2. # Implementation of the Adaptive Huffman Coding with escape symbols.
  3. # See also:
  4. # https://rosettacode.org/wiki/huffman_coding
  5. # Reference:
  6. # Data Compression (Summer 2023) - Lecture 16 - Adaptive Methods
  7. # https://youtube.com/watch?v=YKv-w8bXi9c
  8. define(
  9. ESCAPE_SYMBOL => 256, # escape symbol
  10. )
  11. func walk(Hash n, String s, Hash h) {
  12. if (n.has(:a)) {
  13. h{n{:a}} = s
  14. return nil
  15. }
  16. walk(n{:0}, s+'0', h) if n.has(:0)
  17. walk(n{:1}, s+'1', h) if n.has(:1)
  18. }
  19. func mktree_from_freq(Hash freqs) {
  20. var nodes = freqs.sort.map_2d { |b,f|
  21. Hash(a => b, freq => f)
  22. }
  23. loop {
  24. nodes.sort_by!{|h| h{:freq} }
  25. var(x, y) = (nodes.shift, nodes.shift)
  26. if (defined(x)) {
  27. if (defined(y)) {
  28. nodes << Hash(:0 => x, :1 => y, :freq => x{:freq}+y{:freq})
  29. }
  30. else {
  31. nodes << Hash(:0 => x, :freq => x{:freq})
  32. }
  33. }
  34. nodes.len > 1 || break
  35. }
  36. var n = nodes.first
  37. walk(n, '', n{:tree} = Hash())
  38. return n
  39. }
  40. func encode(Array bytes, Array alphabet) {
  41. var freq = alphabet.freq
  42. var freq2 = [ESCAPE_SYMBOL].freq
  43. var enc = []
  44. var tree = mktree_from_freq(freq)
  45. bytes.each {|b|
  46. var tree2 = mktree_from_freq(freq2)
  47. if (freq2.has(b)) {
  48. enc << tree2{:tree}{b}
  49. ++freq2{b}
  50. }
  51. else {
  52. freq2{b} = 1
  53. enc << tree2{:tree}{ESCAPE_SYMBOL}
  54. enc << tree{:tree}{b}
  55. }
  56. }
  57. return enc.join
  58. }
  59. func decode(String enc, Array alphabet) {
  60. var out = []
  61. var prefix = ''
  62. var freq = alphabet.freq
  63. var freq2 = [ESCAPE_SYMBOL].freq
  64. var tree = mktree_from_freq(freq){:tree}.flip
  65. var tree2 = mktree_from_freq(freq2){:tree}.flip
  66. var escape = false
  67. enc.each {|bit|
  68. prefix += bit
  69. if (!escape && tree2.has(prefix)) {
  70. var b = Num(tree2{prefix})
  71. if (b == ESCAPE_SYMBOL) {
  72. escape = true
  73. }
  74. else {
  75. out << b
  76. ++freq2{b}
  77. tree2 = mktree_from_freq(freq2){:tree}.flip
  78. }
  79. prefix = ''
  80. }
  81. elsif (escape && tree.has(prefix)) {
  82. var b = tree{prefix}
  83. out << b
  84. freq2{b} = 1
  85. tree2 = mktree_from_freq(freq2){:tree}.flip
  86. prefix = ''
  87. escape = false
  88. }
  89. }
  90. return out
  91. }
  92. var text = "this is an example for huffman encoding"
  93. var bytes = text.bytes
  94. say var enc = encode(bytes, bytes.uniq)
  95. say var dec = decode(enc, bytes.uniq).pack('C*')
  96. assert_eq(dec, text)