12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- #!/usr/bin/ruby
- # usage: sidef fdf.sf [size] [dir1] [...]
- require('File::Find')
- func find_duplicate_files(Block code, size_min=0, *dirs) {
- var files = Hash()
- %S<File::Find>.find(
- Hash(
- no_chdir => true,
- wanted => func(arg) {
- var file = File(arg)
- file.is_file || return()
- file.is_link && return()
- var size = file.size
- size >= size_min || return()
- files{size} := [] << file
- },
- ) => dirs...
- )
- files.values.each { |set|
- set.len > 1 || next
- var dups = Hash()
- for i in (^set.end) {
- for (var j = set.end; j > i; --j) {
- if (set[i].compare(set[j]) == 0) {
- dups{set[i]} := [] << set.pop_at(j++)
- }
- }
- }
- dups.each{ |k,v| code(k.to_file, v...) }
- }
- return()
- }
- var duplicates = Hash()
- func collect(*files) {
- duplicates{files[0].size} := [] << files
- }
- find_duplicate_files(collect, Num(ARGV.shift), ARGV...)
- for k,v in (duplicates.sort_by { |k| -k.to_i }) {
- say "=> Size: #{k}\n#{'~'*80}"
- for files in v {
- say "#{files.sort.join(%Q[\n])}\n#{'-'*80}"
- }
- }
|