123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- #!/usr/bin/ruby
- # Author: Trizen
- # Date: 02 May 2024
- # Translated: 02 June 2024
- # https://github.com/trizen
- # Implementation of LZSS encoding, using an hash table.
- func lzss_encode (str) {
- var la = 0
- var chars = str.chars
- var end = chars.end
- var min_len = 4 # minimum match length
- var max_len = 255 # maximum match length
- var max_dist = ((1 << 16) - 1) # maximum match distance
- var max_chain_len = 16 # how many recent positions to keep track of
- var (*literals, *distances, *lengths, :table)
- while (la <= end) {
- var best_n = 1
- var best_p = la
- var lookahead = str.substr(la, min_len)
- if (table.has(lookahead)) {
- for p in (table{lookahead}) {
- if (la-p > max_dist) {
- break
- }
- var n = min_len
- while ((n <= max_len) && (la+n <= end) && (chars[la + n - 1] == chars[p + n - 1])) {
- ++n
- }
- if (n > best_n) {
- best_p = p
- best_n = n
- }
- }
- var matched = str.substr(la, best_n)
- var i = 0
- matched.each_cons(min_len, {|key|
- table{key} := [] -> unshift(la + i)
- if (table{key}.len > max_chain_len) {
- table{key}.pop
- }
- ++i
- })
- }
- else {
- table{lookahead} = [la]
- }
- if (best_n > min_len) {
- lengths << (best_n - 1)
- distances << (la - best_p)
- literals << nil
- la += (best_n - 1)
- }
- else {
- lengths << best_n.of(0)...
- distances << best_n.of(0)...
- literals << chars[best_p .. (best_p + best_n - 1)]
- la += best_n
- }
- }
- return (literals, distances, lengths)
- }
- func lzss_decode (literals, distances, lengths) {
- var data = []
- var data_len = 0
- for i in (^lengths) {
- if (lengths[i] == 0) {
- data << literals[i]
- data_len += 1
- next
- }
- var length = lengths[i];
- var dist = distances[i];
- for j in (1 .. length) {
- data << data[data_len + j - dist - 1]
- }
- data_len += length
- }
- data.join
- }
- var string = "abbaabbaabaabaaaa"
- var (literals, distances, lengths) = lzss_encode(string)
- var decoded = lzss_decode(literals, distances, lengths)
- assert_eq(string, decoded)
- for i in (^literals) {
- if (lengths[i] == 0) {
- say literals[i]
- }
- else {
- say "[#{distances[i]}, #{lengths[i]}]"
- }
- }
- for file in ([__FILE__, $^PERL]) { # several tests
- var string = File(file).read(:raw)
- var (literals, distances, lengths) = lzss_encode(string)
- var decoded = lzss_decode(literals, distances, lengths)
- say("Ratio: ", literals.len / literals.grep{defined(_)}.len)
- assert_eq(string, decoded)
- }
- __END__
- a
- b
- b
- a
- [4, 6]
- [3, 5]
- a
- a
- Ratio: 1.31735751295336787564766839378238341968911917098
- Ratio: 1.44651830581478822684852835606604450825556353195
|