lookalike_images.pl 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #!/usr/bin/perl
  2. # Author: Daniel "Trizen" Șuteu
  3. # License: GPLv3
  4. # Date: 26 August 2015
  5. # Edit: 05 June 2021
  6. # https://github.com/trizen
  7. # Find images that look similar, given a main image.
  8. # Blog post:
  9. # https://trizenx.blogspot.com/2015/08/finding-similar-images.html
  10. use 5.020;
  11. use strict;
  12. use warnings;
  13. use experimental qw(bitwise signatures);
  14. use Image::Magick qw();
  15. use List::Util qw(sum);
  16. use File::Find qw(find);
  17. use Getopt::Long qw(GetOptions);
  18. my $width = 32;
  19. my $height = 32;
  20. my $percentage = 60;
  21. my $fuzzy_matching = 0;
  22. my $copy_to = undef;
  23. my $resize_to = $width . 'x' . $height;
  24. my @img_formats = qw(
  25. jpeg
  26. jpg
  27. png
  28. );
  29. sub help ($code = 0) {
  30. local $" = ",";
  31. print <<"EOT";
  32. usage: $0 [options] [main image] [dir]
  33. options:
  34. -p --percentage=i : minimum similarity percentage (default: $percentage)
  35. -r --resize-to=s : resize images to this resolution (default: $resize_to)
  36. -f --fuzzy! : use fuzzy matching (default: $fuzzy_matching)
  37. -c --copy-to=s : copy similar images into this directory
  38. example:
  39. perl $0 -p 75 -r '8x8' main.jpg ~/Pictures
  40. EOT
  41. exit($code);
  42. }
  43. GetOptions(
  44. 'p|percentage=i' => \$percentage,
  45. 'r|resize-to=s' => \$resize_to,
  46. 'f|fuzzy!' => \$fuzzy_matching,
  47. 'c|copy-to=s' => \$copy_to,
  48. 'h|help' => sub { help(0) },
  49. )
  50. or die("Error in command line arguments");
  51. ($width, $height) = split(/\h*x\h*/i, $resize_to);
  52. my $size = $width * $height;
  53. my $img_formats_re = do {
  54. local $" = '|';
  55. qr/\.(@img_formats)\z/i;
  56. };
  57. sub avg ($x, $y, $z) {
  58. ($x + $y + $z) / 3;
  59. }
  60. sub alike_percentage ($x, $y) {
  61. ((($x ^. $y) =~ tr/\0//) / $size)**2 * 100;
  62. }
  63. sub fingerprint ($image) {
  64. my $img = Image::Magick->new;
  65. $img->Read(filename => $image) && return;
  66. $img->AdaptiveResize(width => $width, height => $height) && return;
  67. my @pixels = $img->GetPixels(
  68. map => 'RGB',
  69. x => 0,
  70. y => 0,
  71. width => $width,
  72. height => $height,
  73. normalize => 1,
  74. );
  75. my $i = 0;
  76. my @averages;
  77. while (@pixels) {
  78. my $x = int($i % $width);
  79. my $y = int($i / $width);
  80. push @averages, avg(splice(@pixels, 0, 3));
  81. ++$i;
  82. }
  83. my $avg = sum(@averages) / @averages;
  84. join('', map { $_ < $avg ? 1 : 0 } @averages);
  85. }
  86. sub find_similar_images ($callback, $main_image, @paths) {
  87. my @files;
  88. find {
  89. no_chdir => 1,
  90. wanted => sub {
  91. (/$img_formats_re/o && -f) || return;
  92. push @files,
  93. {
  94. fingerprint => fingerprint($_) // return,
  95. filename => $_,
  96. };
  97. }
  98. } => @paths;
  99. my $main_fingerprint = fingerprint($main_image) // return;
  100. if ($fuzzy_matching) {
  101. my %seen = ($main_fingerprint => 1);
  102. my @similar = ($main_fingerprint);
  103. my @similar_files;
  104. while (@similar) {
  105. my $similar_fingerprint = shift(@similar);
  106. foreach my $file (@files) {
  107. my $p = alike_percentage($similar_fingerprint, $file->{fingerprint});
  108. if ($p >= $percentage and !$seen{$file->{fingerprint}}++) {
  109. push @similar, $file->{fingerprint};
  110. push @similar_files, {score => $p, filename => $file->{filename}};
  111. }
  112. }
  113. }
  114. foreach my $entry (sort { $b->{score} <=> $a->{score} } @similar_files) {
  115. $callback->($entry->{score}, $entry->{filename});
  116. }
  117. }
  118. else {
  119. foreach my $file (@files) {
  120. my $p = alike_percentage($main_fingerprint, $file->{fingerprint});
  121. if ($p >= $percentage) {
  122. $callback->($p, $file->{filename});
  123. }
  124. }
  125. }
  126. return 1;
  127. }
  128. my $main_file = shift(@ARGV) // help(1);
  129. @ARGV || help(1);
  130. if (defined($copy_to)) {
  131. require File::Copy;
  132. if (not -d $copy_to) {
  133. require File::Path;
  134. File::Path::make_path($copy_to)
  135. or die "Can't create path <<$copy_to>>: $!";
  136. }
  137. }
  138. find_similar_images(
  139. sub ($score, $file) {
  140. say sprintf("%.0f%%: %s", $score, $file);
  141. if ($copy_to) {
  142. File::Copy::cp($file, $copy_to);
  143. }
  144. },
  145. $main_file,
  146. @ARGV
  147. );