123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- #!/usr/bin/perl
- # Author: Trizen
- # Date: 09 May 2024
- # Edit: 08 July 2024
- # https://github.com/trizen
- # A simple LZ4 decompressor.
- # References:
- # https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
- # https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
- use 5.036;
- local $| = 1;
- binmode(STDIN, ":raw");
- binmode(STDOUT, ":raw");
- sub bytes2int_lsb ($fh, $n) {
- my $bytes = '';
- $bytes .= getc($fh) for (1 .. $n);
- oct('0b' . reverse unpack('b*', $bytes));
- }
- my $s = '';
- $s .= "\4\"M\30d@\xA7\16\0\0\x80Hello, World!\n\0\0\0\0\xE8C\xD0\x9E"; # uncompressed
- $s .= "\4\"M\30d@\xA7\27\0\0\0\xE5Hello, World! \16\0Prld!\n\0\0\0\0\x9FL\"T"; # compressed
- my $fh;
- if (-t STDIN) {
- open $fh, "<:raw", \$s;
- }
- else {
- $fh = \*STDIN;
- }
- while (!eof($fh)) {
- bytes2int_lsb($fh, 4) == 0x184D2204 or die "Not an LZ4 file\n";
- my $FLG = ord(getc($fh));
- my $BD = ord(getc($fh));
- my $version = $FLG & 0b11_00_00_00;
- my $B_indep = $FLG & 0b00_10_00_00;
- my $B_checksum = $FLG & 0b00_01_00_00;
- my $C_size = $FLG & 0b00_00_10_00;
- my $C_checksum = $FLG & 0b00_00_01_00;
- my $DictID = $FLG & 0b00_00_00_01;
- my $Block_MaxSize = $BD & 0b0_111_0000;
- say STDERR "Maximum block size: $Block_MaxSize";
- if ($version != 0b01_00_00_00) {
- die "Error: Invalid version number";
- }
- if ($C_size) {
- my $content_size = bytes2int_lsb($fh, 8);
- say STDERR "Content size: ", $content_size;
- }
- if ($DictID) {
- my $dict_id = bytes2int_lsb($fh, 4);
- say STDERR "Dictionary ID: ", $dict_id;
- }
- my $header_checksum = ord(getc($fh));
- my $decoded = '';
- while (!eof($fh)) {
- my $block_size = bytes2int_lsb($fh, 4);
- if ($block_size == 0x00000000) { # signifies an EndMark
- say STDERR "Block size == 0";
- last;
- }
- say STDERR "Block size: $block_size";
- if ($block_size >> 31) {
- say STDERR "Highest bit set: ", $block_size;
- $block_size &= ((1 << 31) - 1);
- say STDERR "Block size: ", $block_size;
- my $uncompressed = '';
- read($fh, $uncompressed, $block_size);
- $decoded .= $uncompressed;
- }
- else {
- my $compressed = '';
- read($fh, $compressed, $block_size);
- while ($compressed ne '') {
- my $len_byte = ord(substr($compressed, 0, 1, ''));
- my $literals_length = $len_byte >> 4;
- my $match_len = $len_byte & 0b1111;
- #say STDERR "Literal: ", $literals_length;
- #say STDERR "Match len: ", $match_len;
- if ($literals_length == 15) {
- while (1) {
- my $byte_len = ord(substr($compressed, 0, 1, ''));
- $literals_length += $byte_len;
- last if $byte_len != 255;
- }
- }
- #say STDERR "Total literals length: ", $literals_length;
- my $literals = '';
- if ($literals_length > 0) {
- $literals = substr($compressed, 0, $literals_length, '');
- }
- if ($compressed eq '') { # end of block
- $decoded .= $literals;
- last;
- }
- my $offset = oct('0b' . reverse unpack('b16', substr($compressed, 0, 2, '')));
- if ($offset == 0) {
- die "Corrupted block";
- }
- # say STDERR "Offset: $offset";
- if ($match_len == 15) {
- while (1) {
- my $byte_len = ord(substr($compressed, 0, 1, ''));
- $match_len += $byte_len;
- last if $byte_len != 255;
- }
- }
- $decoded .= $literals;
- $match_len += 4;
- # say STDERR "Total match len: $match_len\n";
- if ($offset >= $match_len) { # non-overlapping matches
- $decoded .= substr($decoded, length($decoded) - $offset, $match_len);
- }
- elsif ($offset == 1) {
- $decoded .= substr($decoded, -1) x $match_len;
- }
- else { # overlapping matches
- foreach my $i (1 .. $match_len) {
- $decoded .= substr($decoded, length($decoded) - $offset, 1);
- }
- }
- }
- }
- if ($B_checksum) {
- my $content_checksum = bytes2int_lsb($fh, 4);
- say STDERR "Block checksum: $content_checksum";
- }
- if ($B_indep) { # blocks are independent of each other
- print $decoded;
- $decoded = '';
- }
- elsif (length($decoded) > 2**16) { # blocks are dependent
- print substr($decoded, 0, -(2**16), '');
- }
- }
- if ($C_checksum) {
- my $content_checksum = bytes2int_lsb($fh, 4);
- say STDERR "Content checksum: $content_checksum";
- }
- print $decoded;
- }
|