wave-cmp.pl 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. #!/usr/bin/perl
  2. # Author: Daniel "Trizen" Șuteu
  3. # License: GPLv3
  4. # Date: 22 March 2015
  5. # Website: https://github.com/trizen
  6. # Find similar audio files by comparing their waveforms.
  7. # Review:
  8. # https://trizenx.blogspot.com/2015/03/similar-audio-files.html
  9. #
  10. ## The waveform is processed block by block:
  11. # _________________________________________
  12. # |_____|_____|_____|_____|_____|_____|_____|
  13. # |_____|_____|_____|_____|_____|_____|_____|
  14. # |_____|_____|_____|_____|_____|_____|_____|
  15. # |_____|_____|_____|_____|_____|_____|_____|
  16. #
  17. # Each block has a distinct number of white pixels, which are collected
  18. # inside an array and constitute the unique fingerprint of the waveform.
  19. #
  20. # Now, each block value is compared with the corresponding value
  21. # of another fingerprint. If the difference from all blocks is within
  22. # the allowed deviation, then the audio files are marked as similar.
  23. #
  24. # In the end, the similar files are reported to the standard output.
  25. # Requirements:
  26. # - ffmpeg: https://ffmpeg.org/
  27. # - wav2png: https://github.com/beschulz/wav2png
  28. use utf8;
  29. use 5.010;
  30. use strict;
  31. use autodie;
  32. use warnings;
  33. require GD;
  34. GD::Image->trueColor(1);
  35. require GDBM_File;
  36. use List::Util qw(sum);
  37. use Getopt::Long qw(GetOptions);
  38. use File::Find qw(find);
  39. use File::Temp qw(tempdir);
  40. use File::Path qw(make_path);
  41. use File::Spec::Functions qw(catfile catdir);
  42. require Digest::MD5;
  43. my $ctx = Digest::MD5->new;
  44. my $pkgname = 'wave-cmp';
  45. my $version = 0.01;
  46. my $deviation = 5;
  47. my ($width, $height) = (1800, 300);
  48. my ($div_x, $div_y) = (10, 2);
  49. sub help {
  50. my ($code) = @_;
  51. print <<"EOT";
  52. usage: $0 [options] [dirs|files]
  53. => Waveform generation
  54. -w --width=i : width of the waveform (default: $width)
  55. -h --height=i : height of the waveform (default: $height)
  56. => Waveform processing
  57. -x --x-div=i : divisions along the X-axis (default: $div_x)
  58. -y --y-div=i : divisions along the Y-axis (default: $div_y)
  59. -d --deviation=i : tolerance deviation value (default: $deviation)
  60. --help : print this message and exit
  61. --version : print the version number and exit
  62. example:
  63. $0 --deviation=6 ~/Music
  64. EOT
  65. exit($code);
  66. }
  67. sub version {
  68. print "$pkgname $version\n";
  69. exit 0;
  70. }
  71. GetOptions(
  72. 'w|width=i' => \$width,
  73. 'h|height=i' => \$height,
  74. 'x|x-div=i' => \$div_x,
  75. 'y|y-div=i' => \$div_y,
  76. 'd|deviation=i' => \$deviation,
  77. 'help' => sub { help(0) },
  78. 'v|version' => \&version,
  79. )
  80. or die("Error in command line arguments");
  81. my $sq_x = int($width / $div_x);
  82. my $sq_y = int($height / $div_y);
  83. my $limit_x = $width - $sq_x;
  84. my $limit_y = int($height / 2) - $sq_y; # analyze only the first half
  85. # Source: https://en.wikipedia.org/wiki/Audio_file_format#List_of_formats
  86. my @audio_formats = qw(
  87. 3gp
  88. act
  89. aiff
  90. aac
  91. amr
  92. au
  93. awb
  94. dct
  95. dss
  96. flac
  97. gsm
  98. m4a
  99. m4p
  100. mp3
  101. mpc
  102. ogg oga
  103. opus
  104. ra rm
  105. raw
  106. sln
  107. tta
  108. vox
  109. wav
  110. wma
  111. wv
  112. webm
  113. );
  114. my $audio_formats_re = do {
  115. local $" = '|';
  116. qr/\.(?:@audio_formats)\z/i;
  117. };
  118. my $home_dir =
  119. $ENV{HOME}
  120. || $ENV{LOGDIR}
  121. || (getpwuid($<))[7]
  122. || `echo -n ~`;
  123. my $xdg_config_home = catdir($home_dir, '.config');
  124. my $cache_dir = catdir($xdg_config_home, $pkgname);
  125. my $cache_db = catfile($cache_dir, 'fp.db');
  126. if (not -d $cache_dir) {
  127. make_path($cache_dir);
  128. }
  129. my $tmpdir = tempdir(CLEANUP => 1);
  130. tie my %db, 'GDBM_File', $cache_db, &GDBM_File::GDBM_WRCREAT, 0640;
  131. #
  132. #-- execute the ffmpeg and wave2png commands and return the waveform PNG data
  133. #
  134. sub generate_waveform {
  135. my ($file, $output) = @_;
  136. #<<<
  137. # Using sox (currently broken)
  138. # return scalar `sox \Q$file\E -q --norm -V0 --multi-threaded -t wav --encoding signed-integer - | wav2png -w $width -h $height -f ffffffff -b 00000000 -o /dev/stdout /dev/stdin`;
  139. #>>>
  140. my $tmpfile = catfile($tmpdir, $file . '.wav');
  141. system("ffmpeg", "-loglevel", "quiet", "-i", $file, $tmpfile);
  142. $? == 0 or return;
  143. my $waveform = `wav2png -w $width -h $height -f 000000ff -b ffffff00 -o /dev/stdout \Q$tmpfile\E`;
  144. unlink($tmpfile);
  145. return $waveform;
  146. }
  147. #
  148. #-- return the md5 hex digest of the content of a file
  149. #
  150. sub md5_file {
  151. my ($file) = @_;
  152. open my $fh, '<:raw', $file;
  153. $ctx->addfile($fh);
  154. $ctx->hexdigest;
  155. }
  156. #
  157. #-- take image data as input and return a fingerprint array ref
  158. #
  159. sub generate_fingerprint {
  160. my ($image_data) = @_;
  161. $image_data eq '' and return;
  162. state %rgb_cache; # cache the RGB values of pixels
  163. my @fingerprint;
  164. my $image = GD::Image->new($image_data) // return;
  165. for (my $i = 0 ; $i <= $limit_x ; $i += $sq_x) {
  166. for (my $j = 0 ; $j <= $limit_y ; $j += $sq_y) {
  167. my $fill = 0;
  168. foreach my $x ($i .. $i + $sq_x - 1) {
  169. foreach my $y ($j .. $j + $sq_y - 1) {
  170. my $index = $image->getPixel($x, $y);
  171. my $rgb = $rgb_cache{$index} //= [$image->rgb($index)];
  172. $fill++ if $rgb->[0] == 255; # check only the value of red
  173. }
  174. }
  175. push @fingerprint, $fill;
  176. }
  177. }
  178. return \@fingerprint;
  179. }
  180. #
  181. #-- fetch or generate the fingerprint for a given audio file
  182. #
  183. sub fingerprint {
  184. my ($audio_file) = @_;
  185. state $local_cache = {};
  186. return $local_cache->{$audio_file}
  187. if exists $local_cache->{$audio_file};
  188. my $md5 = md5_file($audio_file);
  189. my $key = "$width/$height/$div_x/$div_y/$md5";
  190. if (not exists $db{$key}) {
  191. my $image_data = generate_waveform($audio_file) // return;
  192. my $fingerprint = generate_fingerprint($image_data) // return;
  193. $db{$key} = join(':', @{$fingerprint});
  194. return ($local_cache->{$audio_file} = $fingerprint);
  195. }
  196. $local_cache->{$audio_file} //= [split /:/, $db{$key}];
  197. }
  198. #
  199. #-- compare two fingerprints and return true if they are alike
  200. #
  201. sub alike_fingerprints {
  202. my ($a1, $a2) = @_;
  203. foreach my $i (0 .. $#{$a1}) {
  204. my $value = abs($a1->[$i] - $a2->[$i]) / ($sq_x * $sq_y) * 100;
  205. return if $value > $deviation;
  206. }
  207. return 1;
  208. }
  209. #
  210. #-- compare two audio files and return true if they are alike
  211. #
  212. sub alike_files {
  213. my ($file1, $file2) = @_;
  214. my $fp1 = fingerprint($file1) // return;
  215. my $fp2 = fingerprint($file2) // return;
  216. alike_fingerprints($fp1, $fp2);
  217. }
  218. #
  219. #-- find and call $code with a group of similar audio files
  220. #
  221. sub find_similar_audio_files {
  222. my $code = shift;
  223. my @files;
  224. find {
  225. no_chdir => 1,
  226. wanted => sub {
  227. /$audio_formats_re/ || return;
  228. lstat;
  229. (-f _) && (not -l _) && push @files, $_;
  230. }
  231. } => @_;
  232. my %groups;
  233. my %seen;
  234. my $limit = $#files;
  235. foreach my $i (0 .. $limit) {
  236. foreach my $j ($i + 1 .. $limit) {
  237. next if $seen{$files[$j]};
  238. if (alike_files($files[$i], $files[$j])) {
  239. $groups{$i} //= [$files[$i]];
  240. $seen{$files[$j]}++;
  241. push @{$groups{$i}}, $files[$j];
  242. }
  243. }
  244. if (exists $groups{$i}) {
  245. $code->(delete $groups{$i});
  246. }
  247. }
  248. }
  249. #
  250. #-- print a group of files followed by an horizontal line
  251. #
  252. sub print_group {
  253. my ($group) = @_;
  254. foreach my $file (sort { (lc($a) cmp lc($b)) || ($a cmp $b) } @{$group}) {
  255. say $file;
  256. }
  257. say "-" x 80;
  258. }
  259. @ARGV || help(2);
  260. find_similar_audio_files(\&print_group, @ARGV);