test_compressors.pl 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. #!/usr/bin/perl
  2. # Author: Trizen
  3. # Date: 19 March 2024
  4. # https://github.com/trizen
  5. use 5.036;
  6. use File::Temp qw(tempdir tempfile);
  7. use File::Compare qw(compare);
  8. use File::Basename qw(basename);
  9. use File::Spec::Functions qw(catfile);
  10. use List::Util qw(min);
  11. use Time::HiRes qw(gettimeofday tv_interval);
  12. my %ignored_methods = (
  13. 'tac_file_compression.pl' => 1, # slow
  14. 'tacc_file_compression.pl' => 1, # slow
  15. 'rans_file_compression.pl' => 1, # slow
  16. 'tzip_file_compression.pl' => 1, # poor compression / slow
  17. 'tzip2_file_compression.pl' => 1, # poor compression / slow
  18. 'lzt_file_compression.pl' => 1, # poor compression
  19. 'lzhc_file_compression.pl' => 1, # very poor compression
  20. 'lzt2_file_compression.pl' => 1, # slow
  21. 'bbwr_file_compression.pl' => 1, # slow
  22. 'ppmh_file_compression.pl' => 1, # slow
  23. );
  24. my $input_file = shift(@ARGV) // die "usage: perl $0 [input file] [regex]\n";
  25. my $regex = shift(@ARGV) // '';
  26. if (not -f $input_file) {
  27. die "Error for input file <<$input_file>>: $!\n";
  28. }
  29. my $compressed_dir = tempdir(CLEANUP => 1);
  30. my $decompressed_dir = tempdir(CLEANUP => 1);
  31. my @stats = ({format => 'orig', filename => basename($input_file), compression_time => 0, decompression_time => 0, size => -s $input_file});
  32. sub commify ($n) {
  33. scalar reverse(reverse($n) =~ s/(\d{3})(?=\d)/$1,/gr);
  34. }
  35. foreach my $file (glob("*_file_compression.pl")) {
  36. next if $ignored_methods{$file};
  37. $file =~ /$regex/o or next;
  38. say "\n:: Testing: $file";
  39. my ($format) = $file =~ /^([^_]+)/;
  40. my $basename = basename($input_file) . '.' . $format;
  41. my $compressed_file = catfile($compressed_dir, $basename);
  42. my $compression_t0 = [gettimeofday];
  43. system($^X, $file, '-i', $input_file, '-o', $compressed_file);
  44. my $compression_dt = tv_interval($compression_t0);
  45. $? == 0 or die "compression error for: $file";
  46. my (undef, $decompressed_file) = tempfile(DIR => $decompressed_dir);
  47. my $decompression_t0 = [gettimeofday];
  48. system($^X, $file, '-r', '-e', '-i', $compressed_file, '-o', $decompressed_file);
  49. my $decompression_dt = tv_interval($decompression_t0);
  50. $? == 0 or die "decompression error for: $file";
  51. if (compare($decompressed_file, $input_file) != 0) {
  52. die "Decompressed file does not match the input file for: $file";
  53. }
  54. push @stats,
  55. {
  56. format => $format,
  57. filename => $basename,
  58. compression_time => $compression_dt,
  59. decompression_time => $decompression_dt,
  60. size => -s $compressed_file,
  61. };
  62. }
  63. say '';
  64. printf("%8s %6s %6s %6s %s\n", "SIZE", "RATIO", "COMPRE", "DECOMP", "FILENAME");
  65. foreach my $entry (sort { $a->{size} <=> $b->{size} } @stats) {
  66. printf("%8s %6.3f %6.3f %6.3f %s\n",
  67. commify($entry->{size}),
  68. (-s $input_file) / $entry->{size},
  69. $entry->{compression_time},
  70. $entry->{decompression_time},
  71. $entry->{filename});
  72. }
  73. say '';
  74. my $top = min(20, scalar(@stats) - 1);
  75. say "Top $top fastest compression methods: ",
  76. join(', ', map { $_->{format} } (sort { $a->{compression_time} <=> $b->{compression_time} } grep { $_->{compression_time} > 0 } @stats)[0 .. $top - 1]);
  77. say "Top $top fastest decompression methods: ",
  78. join(', ', map { $_->{format} } (sort { $a->{decompression_time} <=> $b->{decompression_time} } grep { $_->{decompression_time} > 0 } @stats)[0 .. $top - 1]);
  79. say '';
  80. say "Top $top slowest compression methods: ",
  81. join(', ', map { $_->{format} } (sort { $b->{compression_time} <=> $a->{compression_time} } grep { $_->{compression_time} > 0 } @stats)[0 .. $top - 1]);
  82. say "Top $top slowest decompression methods: ",
  83. join(', ', map { $_->{format} } (sort { $b->{decompression_time} <=> $a->{decompression_time} } grep { $_->{decompression_time} > 0 } @stats)[0 .. $top - 1]);
  84. __END__
  85. SIZE RATIO COMPRE DECOMP FILENAME
  86. 2,356 6.088 0.148 0.144 perl.bwad
  87. 2,359 6.081 0.187 0.192 perl.bwlzad2
  88. 2,379 6.029 0.210 0.193 perl.bwlzad
  89. 2,413 5.944 0.053 0.037 perl.bwac
  90. 2,414 5.942 0.056 0.051 perl.bwaz
  91. 2,418 5.932 0.083 0.067 perl.bwlza2
  92. 2,426 5.913 0.090 0.065 perl.bwlza
  93. 2,426 5.913 0.076 0.050 perl.bwt
  94. 2,443 5.871 0.079 0.061 perl.bwlz
  95. 2,591 5.536 0.136 0.043 perl.bwrm
  96. 2,626 5.462 0.134 0.046 perl.bwrl2
  97. 2,653 5.407 0.153 0.073 perl.bwrlz
  98. 2,695 5.322 0.179 0.180 perl.lzsad
  99. 2,751 5.214 0.141 0.052 perl.bwrla
  100. 2,760 5.197 0.135 0.049 perl.bwrl
  101. 2,819 5.088 0.079 0.069 perl.lzsa
  102. 2,831 5.067 0.077 0.041 perl.bwt2
  103. 2,835 5.060 0.104 0.065 perl.bwlz2
  104. 2,836 5.058 0.057 0.042 perl.lzss
  105. 2,865 5.007 0.086 0.048 perl.lzsbw
  106. 2,868 5.001 0.043 0.041 perl.lzaz
  107. 2,870 4.998 0.042 0.035 perl.lzac
  108. 2,877 4.986 0.070 0.059 perl.bwlzss
  109. 2,878 4.984 0.037 0.030 perl.lzhd
  110. 2,905 4.938 0.169 0.077 perl.bwrlz2
  111. 2,980 4.813 0.057 0.028 perl.bww
  112. 3,003 4.777 0.051 0.042 perl.mra
  113. 3,005 4.773 0.055 0.046 perl.bwlzhd
  114. 3,014 4.759 0.135 0.126 perl.lzbwad
  115. 3,025 4.742 0.065 0.046 perl.mrh
  116. 3,027 4.739 0.028 0.023 perl.lzw
  117. 3,028 4.737 0.075 0.040 perl.lzbwd
  118. 3,030 4.734 0.069 0.050 perl.mrlz
  119. 3,072 4.669 0.063 0.037 perl.lzbwh
  120. 3,146 4.559 0.075 0.042 perl.mbwr
  121. 3,176 4.516 0.062 0.040 perl.lzbwa
  122. 3,186 4.502 0.057 0.036 perl.lzbw
  123. 3,214 4.463 0.036 0.031 perl.lzih
  124. 3,230 4.441 0.022 0.029 perl.rlh
  125. 3,321 4.319 0.053 0.042 perl.lza
  126. 3,335 4.301 0.047 0.035 perl.lzh
  127. 3,504 4.094 0.032 0.037 perl.rlac
  128. 4,052 3.540 0.030 0.034 perl.hfm
  129. 4,193 3.421 0.038 0.020 perl.lz77
  130. 14,344 1.000 0.000 0.000 perl
  131. Top 20 fastest compression methods: rlh, lzw, hfm, rlac, lzih, lzhd, lz77, lzac, lzaz, lzh, mra, lza, bwac, bwlzhd, bwaz, lzss, lzbw, bww, lzbwa, lzbwh
  132. Top 20 fastest decompression methods: lz77, lzw, bww, rlh, lzhd, lzih, hfm, lzh, lzac, lzbw, lzbwh, bwac, rlac, lzbwa, lzbwd, bwt2, lzaz, lza, mbwr, mra
  133. Top 20 slowest compression methods: bwlzad, bwlzad2, lzsad, bwrlz2, bwrlz, bwad, bwrla, bwrm, bwrl, lzbwad, bwrl2, bwlz2, bwlza, lzsbw, bwlza2, lzsa, bwlz, bwt2, bwt, mbwr
  134. Top 20 slowest decompression methods: bwlzad, bwlzad2, lzsad, bwad, lzbwad, bwrlz2, bwrlz, lzsa, bwlza2, bwlza, bwlz2, bwlz, bwlzss, bwrla, bwaz, bwt, mrlz, bwrl, lzsbw, bwrl2