123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- #!/usr/bin/perl
- # Daniel "Trizen" Șuteu
- # License: GPLv3
- # Date: 21 June 2012
- # https://github.com/trizen
- # Find and list duplicated files from one or more paths
- #
- ## WARNING! For strict duplicates, use the 'fdf' script:
- # https://github.com/trizen/perl-scripts/blob/master/Finders/fdf
- #
- use 5.005;
- use strict;
- use warnings;
- use File::Find qw(find);
- use File::Basename qw(basename);
- use Getopt::Std qw(getopts);
- my @dirs = grep { -d } @ARGV;
- die <<"HELP" if !@dirs;
- usage: $0 [options] /my/path [...]
- Options:
- -f : keep only the first duplicated file
- -l : keep only the last duplicated file
- HELP
- my %opts;
- if (@ARGV) {
- getopts("fl", \%opts);
- }
- sub compare_strings ($$) {
- my ($name1, $name2) = @_;
- return 0 if $name1 eq $name2;
- if (length($name1) > length($name2)) {
- ($name2, $name1) = ($name1, $name2);
- }
- my $len1 = length($name1);
- my $len2 = length($name2);
- my $min = int(0.5 + $len2 / 2);
- return -1 if $min > $len1;
- my $diff = $len1 - $min;
- foreach my $i (0 .. $diff) {
- foreach my $j ($i .. $diff) {
- if (index($name2, substr($name1, $i, $min + $j - $i)) != -1) {
- return 0;
- }
- }
- }
- return 1;
- }
- sub find_duplicated_files (&@) {
- my $code = shift;
- my %files;
- find {
- no_chdir => 1,
- wanted => sub {
- lstat;
- return if ((-s _) < 4 * 1024); # skips files smaller than 4KB
- -f _ && (not -l _) && push @{$files{-s _}}, $_;
- }
- } => @_;
- foreach my $files (values %files) {
- next if $#{$files} < 1;
- my %dups;
- foreach my $i (0 .. $#{$files} - 1) {
- for (my $j = $i + 1 ; $j <= $#{$files} ; $j++) {
- if (compare_strings(basename($files->[$i]), basename($files->[$j])) == 0) {
- push @{$dups{$files->[$i]}}, splice @{$files}, $j--, 1;
- }
- }
- }
- while (my ($fparent, $fdups) = each %dups) {
- $code->(sort $fparent, @{$fdups});
- }
- }
- return;
- }
- {
- local $, = "\n";
- local $\ = "\n";
- find_duplicated_files {
- print @_, "-" x 80 if @_;
- foreach my $i (
- $opts{f} ? (1 .. $#_)
- : $opts{l} ? (0 .. $#_ - 1)
- : ()
- ) {
- unlink $_[$i] or warn "[error]: Can't delete: $!\n";
- }
- }
- @dirs;
- }
|