arithmetic_coding.sf 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. #!/usr/bin/ruby
  2. # Author: Daniel "Trizen" Șuteu
  3. # License: GPLv3
  4. # Date: 01 May 2015
  5. # Website: https://github.com/trizen
  6. #
  7. ## The arithmetic coding algorithm.
  8. #
  9. # See also:
  10. # https://en.wikipedia.org/wiki/Arithmetic_coding#Arithmetic_coding_as_a_generalized_change_of_radix
  11. func cumulative_freq(freq) {
  12. var cf = Hash()
  13. var total = 0
  14. 256.range.each { |b|
  15. if (freq.contains(b)) {
  16. cf{b} = total
  17. total += freq{b}
  18. }
  19. }
  20. return cf
  21. }
  22. func arithmethic_coding(bytes, radix) {
  23. # The frequency characters
  24. var freq = Hash()
  25. bytes.each { |c| freq{c} := 0 ++ }
  26. # The cumulative frequency table
  27. var cf = cumulative_freq(freq)
  28. # Limit and base
  29. var lim = bytes.end
  30. var base = lim+1
  31. # Lower bound
  32. var L = 0
  33. # Product of all frequencies
  34. var pf = 1
  35. # Each term is multiplied by the product of the
  36. # frequencies of all previously occurring symbols
  37. base.range.each { |i|
  38. var x = (cf{bytes[i]} * base**(lim - i))
  39. L += x*pf
  40. pf *= freq{bytes[i]}
  41. }
  42. # Upper bound
  43. var U = L+pf
  44. var pow = pf.log(radix).int
  45. var enc = ((U-1) // radix**pow) #/
  46. return (enc, pow, freq)
  47. }
  48. func arithmethic_decoding(enc, radix, pow, freq) {
  49. # Multiply enc by 10^pow
  50. enc *= radix**pow;
  51. var base = 0
  52. freq.each_value { |v| base += v }
  53. # Create the cumulative frequency table
  54. var cf = cumulative_freq(freq);
  55. # Create the dictionary
  56. var dict = Hash()
  57. cf.each_kv { |k,v|
  58. dict{v} = k
  59. }
  60. # Fill the gaps in the dictionary
  61. var lchar = ''
  62. base.range.each { |i|
  63. if (dict.contains(i)) {
  64. lchar = dict{i}
  65. }
  66. elsif (!lchar.is_empty) {
  67. dict{i} = lchar
  68. }
  69. }
  70. # Decode the input number
  71. var decoded = []
  72. (base-1).downto(0).each { |i|
  73. var pow = base**i;
  74. var div = (enc // pow) #/
  75. var c = dict{div}
  76. var fv = freq{c}
  77. var cv = cf{c}
  78. var rem = ((enc - pow*cv) // fv) #/
  79. enc = rem
  80. decoded << c
  81. }
  82. # Return the decoded output
  83. return decoded
  84. }
  85. #
  86. ## Run some tests
  87. #
  88. const radix = 10 # can be any integer greater or equal with 2
  89. %w(DABDDB DABDDBBDDBA ABBDDD ABRACADABRA CoMpReSSeD Sidef Trizen google TOBEORNOTTOBEORTOBEORNOT 象形字).each { |str|
  90. var (enc, pow, freq) = arithmethic_coding(str.bytes, radix)
  91. var dec = arithmethic_decoding(enc, radix, pow, freq).join_bytes('UTF-8')
  92. say "Encoded: #{enc} * #{radix}^#{pow}";
  93. say "Decoded: #{dec}";
  94. if (str != dec) {
  95. die "\tHowever that is incorrect!"
  96. }
  97. say '-'*80;
  98. }