lz4_decompressor.pl 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #!/usr/bin/perl
  2. # Author: Trizen
  3. # Date: 09 May 2024
  4. # Edit: 08 July 2024
  5. # https://github.com/trizen
  6. # A simple LZ4 decompressor.
  7. # References:
  8. # https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
  9. # https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
  10. use 5.036;
  11. local $| = 1;
  12. binmode(STDIN, ":raw");
  13. binmode(STDOUT, ":raw");
  14. sub bytes2int_lsb ($fh, $n) {
  15. my $bytes = '';
  16. $bytes .= getc($fh) for (1 .. $n);
  17. oct('0b' . reverse unpack('b*', $bytes));
  18. }
  19. my $s = '';
  20. $s .= "\4\"M\30d@\xA7\16\0\0\x80Hello, World!\n\0\0\0\0\xE8C\xD0\x9E"; # uncompressed
  21. $s .= "\4\"M\30d@\xA7\27\0\0\0\xE5Hello, World! \16\0Prld!\n\0\0\0\0\x9FL\"T"; # compressed
  22. my $fh;
  23. if (-t STDIN) {
  24. open $fh, "<:raw", \$s;
  25. }
  26. else {
  27. $fh = \*STDIN;
  28. }
  29. while (!eof($fh)) {
  30. bytes2int_lsb($fh, 4) == 0x184D2204 or die "Not an LZ4 file\n";
  31. my $FLG = ord(getc($fh));
  32. my $BD = ord(getc($fh));
  33. my $version = $FLG & 0b11_00_00_00;
  34. my $B_indep = $FLG & 0b00_10_00_00;
  35. my $B_checksum = $FLG & 0b00_01_00_00;
  36. my $C_size = $FLG & 0b00_00_10_00;
  37. my $C_checksum = $FLG & 0b00_00_01_00;
  38. my $DictID = $FLG & 0b00_00_00_01;
  39. my $Block_MaxSize = $BD & 0b0_111_0000;
  40. say STDERR "Maximum block size: $Block_MaxSize";
  41. if ($version != 0b01_00_00_00) {
  42. die "Error: Invalid version number";
  43. }
  44. if ($C_size) {
  45. my $content_size = bytes2int_lsb($fh, 8);
  46. say STDERR "Content size: ", $content_size;
  47. }
  48. if ($DictID) {
  49. my $dict_id = bytes2int_lsb($fh, 4);
  50. say STDERR "Dictionary ID: ", $dict_id;
  51. }
  52. my $header_checksum = ord(getc($fh));
  53. my $decoded = '';
  54. while (!eof($fh)) {
  55. my $block_size = bytes2int_lsb($fh, 4);
  56. if ($block_size == 0x00000000) { # signifies an EndMark
  57. say STDERR "Block size == 0";
  58. last;
  59. }
  60. say STDERR "Block size: $block_size";
  61. if ($block_size >> 31) {
  62. say STDERR "Highest bit set: ", $block_size;
  63. $block_size &= ((1 << 31) - 1);
  64. say STDERR "Block size: ", $block_size;
  65. my $uncompressed = '';
  66. read($fh, $uncompressed, $block_size);
  67. $decoded .= $uncompressed;
  68. }
  69. else {
  70. my $compressed = '';
  71. read($fh, $compressed, $block_size);
  72. while ($compressed ne '') {
  73. my $len_byte = ord(substr($compressed, 0, 1, ''));
  74. my $literals_length = $len_byte >> 4;
  75. my $match_len = $len_byte & 0b1111;
  76. #say STDERR "Literal: ", $literals_length;
  77. #say STDERR "Match len: ", $match_len;
  78. if ($literals_length == 15) {
  79. while (1) {
  80. my $byte_len = ord(substr($compressed, 0, 1, ''));
  81. $literals_length += $byte_len;
  82. last if $byte_len != 255;
  83. }
  84. }
  85. #say STDERR "Total literals length: ", $literals_length;
  86. my $literals = '';
  87. if ($literals_length > 0) {
  88. $literals = substr($compressed, 0, $literals_length, '');
  89. }
  90. if ($compressed eq '') { # end of block
  91. $decoded .= $literals;
  92. last;
  93. }
  94. my $offset = oct('0b' . reverse unpack('b16', substr($compressed, 0, 2, '')));
  95. if ($offset == 0) {
  96. die "Corrupted block";
  97. }
  98. # say STDERR "Offset: $offset";
  99. if ($match_len == 15) {
  100. while (1) {
  101. my $byte_len = ord(substr($compressed, 0, 1, ''));
  102. $match_len += $byte_len;
  103. last if $byte_len != 255;
  104. }
  105. }
  106. $decoded .= $literals;
  107. $match_len += 4;
  108. # say STDERR "Total match len: $match_len\n";
  109. if ($offset >= $match_len) { # non-overlapping matches
  110. $decoded .= substr($decoded, length($decoded) - $offset, $match_len);
  111. }
  112. elsif ($offset == 1) {
  113. $decoded .= substr($decoded, -1) x $match_len;
  114. }
  115. else { # overlapping matches
  116. foreach my $i (1 .. $match_len) {
  117. $decoded .= substr($decoded, length($decoded) - $offset, 1);
  118. }
  119. }
  120. }
  121. }
  122. if ($B_checksum) {
  123. my $content_checksum = bytes2int_lsb($fh, 4);
  124. say STDERR "Block checksum: $content_checksum";
  125. }
  126. if ($B_indep) { # blocks are independent of each other
  127. print $decoded;
  128. $decoded = '';
  129. }
  130. elsif (length($decoded) > 2**16) { # blocks are dependent
  131. print substr($decoded, 0, -(2**16), '');
  132. }
  133. }
  134. if ($C_checksum) {
  135. my $content_checksum = bytes2int_lsb($fh, 4);
  136. say STDERR "Content checksum: $content_checksum";
  137. }
  138. print $decoded;
  139. }