delta_encoding_with_double-elias_coding.sf 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #!/usr/bin/ruby
  2. # Author: Trizen
  3. # Date: 20 June 2023
  4. # https://github.com/trizen
  5. # Implementation of the Delta encoding scheme, optimized for large deltas, using Elias coding.
  6. # Reference:
  7. # Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
  8. # https://youtube.com/watch?v=-3H_eDbWNEU
  9. func read_bit (FileHandle fh, Ref bitstring) {
  10. if ((*bitstring \\ '').is_empty) {
  11. *bitstring = unpack('b*', fh.getc \\ die "error")
  12. }
  13. var bit = (*bitstring).substr(-1)
  14. *bitstring = (*bitstring).substr(0, -1)
  15. return bit
  16. }
  17. func delta_encode (Array integers, Bool double = false) {
  18. var deltas = [0, integers.len, integers...].diffs
  19. var bitstring = FileHandle.new_buf(:raw)
  20. for d in (deltas) {
  21. if (d == 0) {
  22. bitstring << '0'
  23. }
  24. elsif (double) {
  25. var t = d.abs.inc.as_bin
  26. var l = t.len.as_bin
  27. bitstring << join('', '1', ((d < 0) ? '0' : '1'), ('1' * (l.len-1)), '0', l.substr(1), t.substr(1))
  28. }
  29. else {
  30. var t = d.abs.as_bin
  31. bitstring << join('', '1', ((d < 0) ? '0' : '1'), ('1' * (t.len-1)), '0', t.substr(1))
  32. }
  33. }
  34. pack('B*', bitstring.parent)
  35. }
  36. func delta_decode (FileHandle fh, Bool double = false) {
  37. var deltas = []
  38. var buffer = ''
  39. var len = 0
  40. for (var k = 0 ; k <= len ; ++k) {
  41. var bit = read_bit(fh, \buffer)
  42. if (bit == '0') {
  43. deltas << 0
  44. }
  45. elsif (double) {
  46. var bit = read_bit(fh, \buffer)
  47. var bl = (^Inf -> first { read_bit(fh, \buffer) != '1' })
  48. var bl2 = Num('1' + bl.of { read_bit(fh, \buffer) }.join, 2)
  49. var int = Num('1' + (bl2-1).of { read_bit(fh, \buffer) }.join, 2)-1
  50. deltas << (bit == '1' ? int : -int)
  51. }
  52. else {
  53. var bit = read_bit(fh, \buffer)
  54. var n = (^Inf -> first { read_bit(fh, \buffer) != '1' })
  55. var d = Num('1' + n.of { read_bit(fh, \buffer) }.join, 2)
  56. deltas << (bit == '1' ? d : -d)
  57. }
  58. if (k == 0) {
  59. len = deltas.pop
  60. }
  61. }
  62. var acc = [len, deltas...].acc
  63. acc.shift
  64. return acc
  65. }
  66. var str = "TOBEORNOTTOBEORTOBEORNOT"
  67. var enc = delta_encode(str.bytes, true)
  68. var dec = delta_decode(enc.open_r(:raw), true)
  69. say "Encoded: #{unpack('B*', enc)}"
  70. say "Decoded: #{dec.pack('C*')}"
  71. assert_eq(dec.pack('C*'), str)
  72. do {
  73. var str = File(__FILE__).read(:raw)
  74. var encoded = delta_encode(str.bytes)
  75. var decoded = delta_decode(encoded.open_r(:raw))
  76. assert_eq(str, decoded.pack('C*'))
  77. }
  78. __END__
  79. Encoded: 11110101000111101111100101100001101100110111101111110010101110111011000001110011110000101011000011011001101111011111100101011101111101010110000110110011011110111111001010111011101100000111001111000010
  80. Decoded: TOBEORNOTTOBEORTOBEORNOT