arithmetic_coding.sf 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #!/usr/bin/ruby
  2. # Author: Daniel "Trizen" Șuteu
  3. # License: GPLv3
  4. # Date: 01 May 2015
  5. # Website: https://github.com/trizen
  6. #
  7. ## The arithmetic coding algorithm.
  8. #
  9. # See: https://en.wikipedia.org/wiki/Arithmetic_coding#Arithmetic_coding_as_a_generalized_change_of_radix
  10. func cumulative_freq(freq) {
  11. var cf = Hash()
  12. var total = 0
  13. 256.range.each { |b|
  14. if (freq.contains(b)) {
  15. cf{b} = total
  16. total += freq{b}
  17. }
  18. }
  19. return cf
  20. }
  21. func arithmethic_coding(bytes, radix) {
  22. # The frequency characters
  23. var freq = Hash()
  24. bytes.each { |c| freq{c} := 0 ++ }
  25. # The cumulative frequency table
  26. var cf = cumulative_freq(freq)
  27. # Limit and base
  28. var lim = bytes.end
  29. var base = lim+1
  30. # Lower bound
  31. var L = 0
  32. # Product of all frequencies
  33. var pf = 1
  34. # Each term is multiplied by the product of the
  35. # frequencies of all previously occurring symbols
  36. base.range.each { |i|
  37. var x = (cf{bytes[i]} * base**(lim - i))
  38. L += x*pf
  39. pf *= freq{bytes[i]}
  40. }
  41. # Upper bound
  42. var U = L+pf
  43. var pow = pf.log(radix).int
  44. var enc = ((U-1) // radix**pow) #/
  45. return (enc, pow, freq)
  46. }
  47. func arithmethic_decoding(enc, radix, pow, freq) {
  48. # Multiply enc by 10^pow
  49. enc *= radix**pow;
  50. var base = 0
  51. freq.each_value { |v| base += v }
  52. # Create the cumulative frequency table
  53. var cf = cumulative_freq(freq);
  54. # Create the dictionary
  55. var dict = Hash()
  56. cf.each_kv { |k,v|
  57. dict{v} = k
  58. }
  59. # Fill the gaps in the dictionary
  60. var lchar = ''
  61. base.range.each { |i|
  62. if (dict.contains(i)) {
  63. lchar = dict{i}
  64. }
  65. elsif (!lchar.is_empty) {
  66. dict{i} = lchar
  67. }
  68. }
  69. # Decode the input number
  70. var decoded = []
  71. (base-1).downto(0).each { |i|
  72. var pow = base**i;
  73. var div = (enc // pow) #/
  74. var c = dict{div}
  75. var fv = freq{c}
  76. var cv = cf{c}
  77. var rem = ((enc - pow*cv) // fv) #/
  78. enc = rem
  79. decoded << c
  80. }
  81. # Return the decoded output
  82. return decoded
  83. }
  84. #
  85. ## Run some tests
  86. #
  87. const radix = 10 # can be any integer greater or equal with 2
  88. %w(DABDDB DABDDBBDDBA ABBDDD ABRACADABRA CoMpReSSeD Sidef Trizen google TOBEORNOTTOBEORTOBEORNOT 象形字).each { |str|
  89. var (enc, pow, freq) = arithmethic_coding(str.bytes, radix)
  90. var dec = arithmethic_decoding(enc, radix, pow, freq).join_bytes('UTF-8')
  91. say "Encoded: #{enc} * #{radix}^#{pow}";
  92. say "Decoded: #{dec}";
  93. if (str != dec) {
  94. die "\tHowever that is incorrect!"
  95. }
  96. say '-'*80;
  97. }