123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- #!/usr/bin/perl
- # Daniel "Trizen" Șuteu
- # License: GPLv3
- # Date: 01 January 2012
- # Edit: 24 August 2024
- # https://github.com/trizen
- # Find and list duplicate files from one or more paths, with options for
- # deleting or replacing duplicate files with symbolic links to the main file.
- use 5.005;
- use strict;
- use warnings;
- use File::Find qw(find);
- use File::Compare qw(compare);
- use File::Basename qw(basename);
- use Getopt::Long qw(GetOptions);
- my %order_callbacks = (
- path => sub { sort @_ },
- name => sub {
- map { $_->[1] }
- sort { $a->[0] cmp $b->[0] }
- map { [basename($_), $_] } @_;
- },
- time => sub {
- map { $_->[1] }
- sort { $a->[0] <=> $b->[0] }
- map { [-M $_, $_] } @_;
- },
- );
- my @dirs = grep { (-d) or (-f) } @ARGV;
- die <<"HELP" if !@dirs;
- usage: $0 [options] /my/path [...]
- Options:
- -f, --first : keep only the first duplicated file
- -l, --last : keep only the last duplicated file
- -s, --symlink : replace duplicate files with symbolic links (with -f or -l)
- -o, --order=type : order the results by: path, name or time
- -m, --min-size=i : minimum size in bytes (default: 0)
- HELP
- my $keep_first;
- my $keep_last;
- my $create_symlinks;
- my $order_by = 'time';
- my $min_size = 0;
- GetOptions(
- 'f|first!' => \$keep_first,
- 'l|last!' => \$keep_last,
- 's|symlink!' => \$create_symlinks,
- 'o|order|order-by=s' => \$order_by,
- 'm|min-size=i' => \$min_size,
- )
- or die("$0: error in command line arguments\n");
- if (not exists $order_callbacks{$order_by}) {
- local $" = ", ";
- die "$0: invalid value `$order_by` for `--order`: valid values are: @{[sort keys %order_callbacks]}\n";
- }
- sub find_duplicated_files (&@) {
- my $callback = shift;
- my %files;
- find {
- no_chdir => 1,
- wanted => sub {
- lstat;
- (-f _) && (not -l _) && ((-s _) >= $min_size) && push @{$files{-s _}}, $_;
- }
- } => @_;
- foreach my $files (values %files) {
- next if $#{$files} < 1;
- my %dups;
- foreach my $i (0 .. $#{$files} - 1) {
- for (my $j = $i + 1 ; $j <= $#{$files} ; $j++) {
- if (compare($files->[$i], $files->[$j]) == 0) {
- push @{$dups{$files->[$i]}}, splice @{$files}, $j--, 1;
- }
- }
- }
- while (my ($fparent, $fdups) = each %dups) {
- $callback->($order_callbacks{$order_by}($fparent, @{$fdups}));
- }
- }
- return;
- }
- {
- local $, = "\n";
- local $\ = "\n";
- find_duplicated_files {
- my (@files) = @_;
- print @files, "-" x 80;
- my $main_file = (
- $keep_first ? shift(@files)
- : $keep_last ? pop(@files)
- : return
- );
- foreach my $file (@files) {
- print ":: Removing: `$file`";
- unlink($file) or do {
- warn "error: can't delete file `$file': $!\n";
- next;
- };
- if ($create_symlinks) {
- print ":: Symlinking: `$main_file` <- `$file`";
- symlink($main_file, $file) or do {
- warn "error: can't create symbolic link for `$file': $!\n";
- next;
- };
- }
- }
- } @dirs;
- }
|