arithmetic_coding_rational.sf 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/ruby
  2. # Author: Trizen
  3. # Date: 12 July 2023
  4. # https://github.com/trizen
  5. # The Arithmetic Coding algorithm, implemented using rational numbers (impractical).
  6. # Reference:
  7. # Data Compression (Summer 2023) - Lecture 14 - Arithmetic Coding
  8. # https://youtube.com/watch?v=xt3uNibQWlQ
  9. func create_cfreq(freq) {
  10. var c_low = Hash()
  11. var c_high = Hash()
  12. var T = 0
  13. var len = freq.values.sum
  14. for i in (0..256) {
  15. freq{i} \\ next
  16. c_low{i} = T/len
  17. T += freq{i}
  18. c_high{i} = T/len
  19. }
  20. return (c_low, c_high, T)
  21. }
  22. func encode (str) {
  23. var bytes = str.bytes+[256]
  24. var freq = bytes.freq
  25. var(c_low, c_high) = create_cfreq(freq)
  26. var low = 0
  27. var high = 1
  28. for c in (bytes) {
  29. var w = (high - low)
  30. high = (low + w*c_high{c})
  31. low = (low + w*c_low{c})
  32. }
  33. return (low, freq)
  34. }
  35. func decode(v, freq) {
  36. var low = 0
  37. var high = 1
  38. var dec = FileHandle.new_buf(:raw)
  39. var(c_low, c_high, T) = create_cfreq(freq)
  40. var table = []
  41. for i in (0..256) {
  42. c_low{i} \\ next
  43. for j in (c_low{i}*T ..^ T*c_high{i}) {
  44. table[j] = i
  45. }
  46. }
  47. loop {
  48. var w = (high - low)
  49. var sv = (v - low)/w
  50. var c = table[sv * T]
  51. break if (c == 256)
  52. dec << c.chr
  53. high = (low + w*c_high{c})
  54. low = (low + w*c_low{c})
  55. }
  56. return dec.parent
  57. }
  58. var str = "ABRACADABRA AND A VERY SAD SALAD"
  59. if (ARGV) {
  60. if (File(ARGV[0]).exists) {
  61. str = File(ARGV[0]).read(:raw)
  62. }
  63. else {
  64. str = ARGV[0]
  65. }
  66. }
  67. var (enc, freq) = encode(str)
  68. say enc
  69. say enc.as_rat
  70. var dec = decode(enc, freq)
  71. assert_eq(str.len, dec.len)
  72. assert_eq(str, dec)
  73. __END__
  74. 0.303688064925694067996504375985214402219198685615
  75. 1452191786545590180270161383342926848824010927665/4781853336583741771837630738320908723679154940019