123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- #!/usr/bin/perl
- # Daniel "Trizen" Șuteu
- # License: GPLv3
- # Date: 27 December 2011
- # Edit: 18 February 2012
- # Edit: 16 November 2021
- # https://github.com/trizen
- # Find files which have the exact or almost the exact name in a path.
- use 5.010;
- use strict;
- use warnings;
- use Getopt::Long;
- use experimental qw(smartmatch);
- sub usage {
- print <<"HELP";
- usage: $0 [options] [dir]
- options:
- --approx=i : amount of approximateness (default: 0)
- --hidden! : verify hidden files and folders (default: false)
- example: $0 --approx=4 /my/dir
- HELP
- exit 0;
- }
- my $show_hidden_files;
- my $approximate_n;
- GetOptions(
- 'approximate=i' => \$approximate_n,
- 'hidden!' => \$show_hidden_files,
- 'help|h' => \&usage,
- )
- or die "Error in command-line arguments!";
- if (defined $approximate_n) {
- $approximate_n += 1;
- }
- my @files;
- sub locate_files {
- foreach my $dir (@{$_[0]}) {
- $dir = readlink $dir and chop $dir if -l $dir;
- next unless opendir(my $dir_h, $dir);
- my @dirs;
- while (defined(my $file = readdir $dir_h)) {
- if ($show_hidden_files) {
- if ($file eq '.' || $file eq '..') {
- next;
- }
- }
- else {
- next if chr ord $file eq '.';
- }
- if (-d "$dir/$file") {
- push @dirs, "$dir/$file";
- }
- elsif (-f _) {
- push @files, {lc $file, "$dir/$file", 'file', lc $file};
- }
- }
- closedir $dir_h;
- locate_files(\@dirs);
- }
- }
- sub editdist {
- my %h;
- $h{$_}++ for split //, lc shift;
- $h{$_}-- for split //, lc shift;
- my $t = 0;
- $t += ($_ > 0 ? $_ : -$_) for values %h;
- $t;
- }
- sub find_similar_names {
- my ($name, $array_ref) = @_;
- my (@names) =
- sort { $a->[1] <=> $b->[1] } grep { defined } map {
- my $d = editdist($_, $name);
- $d < $approximate_n ? [$_, $d] : undef;
- } grep { $_ ne $name } @$array_ref;
- if (@names) {
- my $best = $names[0][1];
- @names = map { $_->[0] } grep { $_->[1] == $best } @names;
- }
- \@names;
- }
- sub diff {
- my %alike;
- my %table;
- my @found;
- if (defined $approximate_n) {
- my (@names) = map { $_->{'file'} } @files;
- foreach my $file (@files) {
- my (@names) =
- map { $_->{'file'} }
- grep {
- my $length_1 = length $_->{'file'};
- my $length_2 = length $file->{'file'};
- ($length_1 <= $length_2 + $approximate_n) and ($length_1 >= $length_2 - $approximate_n)
- or ($length_1 == $length_2)
- if ($_->{'file'} ne $file->{'file'});
- } @files;
- push @{$table{$file->{$file->{'file'}}}}, @{find_similar_names $file->{'file'}, \@names};
- }
- foreach my $array_1_ref (values %table) {
- next unless $array_1_ref;
- while (my ($file, $array_2_ref) = each %table) {
- if (@{$array_2_ref} and $array_1_ref ~~ $array_2_ref) {
- $alike{$file} = ();
- }
- }
- }
- return map { $_->[1] }
- sort { $a->[0] cmp $b->[0] }
- map { [lc(substr($_, rindex($_, '/'))), $_] }
- keys %alike;
- }
- foreach my $file (@files, @files) {
- $alike{$file->{$file->{'file'}}} = () if $table{$file->{'file'}}++ >= 2;
- }
- return map { $_->[1] }
- sort { $a->[0] cmp $b->[0] }
- map { [lc(substr($_, rindex($_, '/'))), $_] }
- grep { length } keys %alike;
- }
- foreach my $arg (@ARGV) {
- $arg =~ s[(?<=.)/+$][];
- my (@dir) = (-d $arg) ? $arg : next;
- local $, = "\n";
- say diff(locate_files(\@dir));
- undef @files;
- }
|