lzss_encoding_hash_table_fast.sf 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #!/usr/bin/ruby
  2. # Author: Trizen
  3. # Date: 02 May 2024
  4. # Translated: 02 June 2024
  5. # https://github.com/trizen
  6. # Implementation of LZSS encoding, using an hash table.
  7. # A non-optimal, but very fast approach.
  8. func lzss_encode (str) {
  9. var la = 0
  10. var chars = str.chars
  11. var end = chars.end
  12. var min_len = 4 # minimum match length
  13. var max_len = 255 # maximum match length
  14. var max_dist = ((1 << 16) - 1) # maximum match distance
  15. var (*literals, *distances, *lengths, :table)
  16. while (la <= end) {
  17. var best_n = 1
  18. var best_p = la
  19. var lookahead = str.substr(la, min_len)
  20. if (table.has(lookahead) && (la - table{lookahead} <= max_dist)) {
  21. var p = table{lookahead}
  22. var n = min_len
  23. while ((n <= max_len) && (la+n <= end) && (chars[la + n - 1] == chars[p + n - 1])) {
  24. ++n
  25. }
  26. best_p = p
  27. best_n = n
  28. table{lookahead} = la
  29. }
  30. else {
  31. table{lookahead} = la
  32. }
  33. if (best_n > min_len) {
  34. lengths << (best_n - 1)
  35. distances << (la - best_p)
  36. literals << nil
  37. la += (best_n - 1)
  38. }
  39. else {
  40. lengths << best_n.of(0)...
  41. distances << best_n.of(0)...
  42. literals << chars[best_p .. (best_p + best_n - 1)]
  43. la += best_n
  44. }
  45. }
  46. return (literals, distances, lengths)
  47. }
  48. func lzss_decode (literals, distances, lengths) {
  49. var data = []
  50. var data_len = 0
  51. for i in (^lengths) {
  52. if (lengths[i] == 0) {
  53. data << literals[i]
  54. data_len += 1
  55. next
  56. }
  57. var length = lengths[i];
  58. var dist = distances[i];
  59. for j in (1 .. length) {
  60. data << data[data_len + j - dist - 1]
  61. }
  62. data_len += length
  63. }
  64. data.join
  65. }
  66. var string = "abbaabbaabaabaaaa"
  67. var (literals, distances, lengths) = lzss_encode(string)
  68. var decoded = lzss_decode(literals, distances, lengths)
  69. assert_eq(string, decoded)
  70. for i in (^literals) {
  71. if (lengths[i] == 0) {
  72. say literals[i]
  73. }
  74. else {
  75. say "[#{distances[i]}, #{lengths[i]}]"
  76. }
  77. }
  78. for file in ([__FILE__, $^PERL]) { # several tests
  79. var string = File(file).read(:raw)
  80. var (literals, distances, lengths) = lzss_encode(string)
  81. var decoded = lzss_decode(literals, distances, lengths)
  82. say("Ratio: ", literals.len / literals.grep{defined(_)}.len)
  83. assert_eq(string, decoded)
  84. }
  85. __END__
  86. a
  87. b
  88. b
  89. a
  90. [4, 6]
  91. a
  92. a
  93. b
  94. a
  95. a
  96. a
  97. a
  98. Ratio: 1.34062927496580027359781121751025991792065663475
  99. Ratio: 1.46043165467625899280575539568345323741007194245