lzss_encoding_hash_table.sf 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #!/usr/bin/ruby
  2. # Author: Trizen
  3. # Date: 02 May 2024
  4. # Translated: 02 June 2024
  5. # https://github.com/trizen
  6. # Implementation of LZSS encoding, using an hash table.
  7. func lzss_encode (str) {
  8. var la = 0
  9. var chars = str.chars
  10. var end = chars.end
  11. var min_len = 4 # minimum match length
  12. var max_len = 255 # maximum match length
  13. var max_dist = ((1 << 16) - 1) # maximum match distance
  14. var max_chain_len = 16 # how many recent positions to keep track of
  15. var (*literals, *distances, *lengths, :table)
  16. while (la <= end) {
  17. var best_n = 1
  18. var best_p = la
  19. var lookahead = str.substr(la, min_len)
  20. if (table.has(lookahead)) {
  21. for p in (table{lookahead}) {
  22. if (la-p > max_dist) {
  23. break
  24. }
  25. var n = min_len
  26. while ((n <= max_len) && (la+n <= end) && (chars[la + n - 1] == chars[p + n - 1])) {
  27. ++n
  28. }
  29. if (n > best_n) {
  30. best_p = p
  31. best_n = n
  32. }
  33. }
  34. var matched = str.substr(la, best_n)
  35. var i = 0
  36. matched.each_cons(min_len, {|key|
  37. table{key} := [] -> unshift(la + i)
  38. if (table{key}.len > max_chain_len) {
  39. table{key}.pop
  40. }
  41. ++i
  42. })
  43. }
  44. else {
  45. table{lookahead} = [la]
  46. }
  47. if (best_n > min_len) {
  48. lengths << (best_n - 1)
  49. distances << (la - best_p)
  50. literals << nil
  51. la += (best_n - 1)
  52. }
  53. else {
  54. lengths << best_n.of(0)...
  55. distances << best_n.of(0)...
  56. literals << chars[best_p .. (best_p + best_n - 1)]
  57. la += best_n
  58. }
  59. }
  60. return (literals, distances, lengths)
  61. }
  62. func lzss_decode (literals, distances, lengths) {
  63. var data = []
  64. var data_len = 0
  65. for i in (^lengths) {
  66. if (lengths[i] == 0) {
  67. data << literals[i]
  68. data_len += 1
  69. next
  70. }
  71. var length = lengths[i];
  72. var dist = distances[i];
  73. for j in (1 .. length) {
  74. data << data[data_len + j - dist - 1]
  75. }
  76. data_len += length
  77. }
  78. data.join
  79. }
  80. var string = "abbaabbaabaabaaaa"
  81. var (literals, distances, lengths) = lzss_encode(string)
  82. var decoded = lzss_decode(literals, distances, lengths)
  83. assert_eq(string, decoded)
  84. for i in (^literals) {
  85. if (lengths[i] == 0) {
  86. say literals[i]
  87. }
  88. else {
  89. say "[#{distances[i]}, #{lengths[i]}]"
  90. }
  91. }
  92. for file in ([__FILE__, $^PERL]) { # several tests
  93. var string = File(file).read(:raw)
  94. var (literals, distances, lengths) = lzss_encode(string)
  95. var decoded = lzss_decode(literals, distances, lengths)
  96. say("Ratio: ", literals.len / literals.grep{defined(_)}.len)
  97. assert_eq(string, decoded)
  98. }
  99. __END__
  100. a
  101. b
  102. b
  103. a
  104. [4, 6]
  105. [3, 5]
  106. a
  107. a
  108. Ratio: 1.31735751295336787564766839378238341968911917098
  109. Ratio: 1.44651830581478822684852835606604450825556353195