fdf.sf 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. #!/usr/bin/ruby
  2. # usage: sidef fdf.sf [size] [dir1] [...]
  3. require('File::Find')
  4. func find_duplicate_files(Block code, size_min=0, *dirs) {
  5. var files = Hash()
  6. %S<File::Find>.find(
  7. Hash(
  8. no_chdir => true,
  9. wanted => func(arg) {
  10. var file = File(arg)
  11. file.is_file || return()
  12. file.is_link && return()
  13. var size = file.size
  14. size >= size_min || return()
  15. files{size} := [] << file
  16. },
  17. ) => dirs...
  18. )
  19. files.values.each { |set|
  20. set.len > 1 || next
  21. var dups = Hash()
  22. for i in (^set.end) {
  23. for (var j = set.end; j > i; --j) {
  24. if (set[i].compare(set[j]) == 0) {
  25. dups{set[i]} := [] << set.pop_at(j++)
  26. }
  27. }
  28. }
  29. dups.each{ |k,v| code(k.to_file, v...) }
  30. }
  31. return()
  32. }
  33. var duplicates = Hash()
  34. func collect(*files) {
  35. duplicates{files[0].size} := [] << files
  36. }
  37. find_duplicate_files(collect, Num(ARGV.shift), ARGV...)
  38. for k,v in (duplicates.sort_by { |k| -k.to_i }) {
  39. say "=> Size: #{k}\n#{'~'*80}"
  40. for files in v {
  41. say "#{files.sort.join(%Q[\n])}\n#{'-'*80}"
  42. }
  43. }