fdf-filename 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. #!/usr/bin/perl
  2. # Daniel "Trizen" Șuteu
  3. # License: GPLv3
  4. # Date: 21 June 2012
  5. # https://github.com/trizen
  6. # Find and list duplicated files from one or more paths
  7. #
  8. ## WARNING! For strict duplicates, use the 'fdf' script:
  9. # https://github.com/trizen/perl-scripts/blob/master/Finders/fdf
  10. #
  11. use 5.005;
  12. use strict;
  13. use warnings;
  14. use File::Find qw(find);
  15. use File::Basename qw(basename);
  16. use Getopt::Std qw(getopts);
  17. my @dirs = grep { -d } @ARGV;
  18. die <<"HELP" if !@dirs;
  19. usage: $0 [options] /my/path [...]
  20. Options:
  21. -f : keep only the first duplicated file
  22. -l : keep only the last duplicated file
  23. HELP
  24. my %opts;
  25. if (@ARGV) {
  26. getopts("fl", \%opts);
  27. }
  28. sub compare_strings ($$) {
  29. my ($name1, $name2) = @_;
  30. return 0 if $name1 eq $name2;
  31. if (length($name1) > length($name2)) {
  32. ($name2, $name1) = ($name1, $name2);
  33. }
  34. my $len1 = length($name1);
  35. my $len2 = length($name2);
  36. my $min = int(0.5 + $len2 / 2);
  37. return -1 if $min > $len1;
  38. my $diff = $len1 - $min;
  39. foreach my $i (0 .. $diff) {
  40. foreach my $j ($i .. $diff) {
  41. if (index($name2, substr($name1, $i, $min + $j - $i)) != -1) {
  42. return 0;
  43. }
  44. }
  45. }
  46. return 1;
  47. }
  48. sub find_duplicated_files (&@) {
  49. my $code = shift;
  50. my %files;
  51. find {
  52. no_chdir => 1,
  53. wanted => sub {
  54. lstat;
  55. return if ((-s _) < 4 * 1024); # skips files smaller than 4KB
  56. -f _ && (not -l _) && push @{$files{-s _}}, $_;
  57. }
  58. } => @_;
  59. foreach my $files (values %files) {
  60. next if $#{$files} < 1;
  61. my %dups;
  62. foreach my $i (0 .. $#{$files} - 1) {
  63. for (my $j = $i + 1 ; $j <= $#{$files} ; $j++) {
  64. if (compare_strings(basename($files->[$i]), basename($files->[$j])) == 0) {
  65. push @{$dups{$files->[$i]}}, splice @{$files}, $j--, 1;
  66. }
  67. }
  68. }
  69. while (my ($fparent, $fdups) = each %dups) {
  70. $code->(sort $fparent, @{$fdups});
  71. }
  72. }
  73. return;
  74. }
  75. {
  76. local $, = "\n";
  77. local $\ = "\n";
  78. find_duplicated_files {
  79. print @_, "-" x 80 if @_;
  80. foreach my $i (
  81. $opts{f} ? (1 .. $#_)
  82. : $opts{l} ? (0 .. $#_ - 1)
  83. : ()
  84. ) {
  85. unlink $_[$i] or warn "[error]: Can't delete: $!\n";
  86. }
  87. }
  88. @dirs;
  89. }