fdf 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. #!/usr/bin/perl
  2. # Daniel "Trizen" Șuteu
  3. # License: GPLv3
  4. # Date: 01 January 2012
  5. # Edit: 24 August 2024
  6. # https://github.com/trizen
  7. # Find and list duplicate files from one or more paths, with options for
  8. # deleting or replacing duplicate files with symbolic links to the main file.
  9. use 5.005;
  10. use strict;
  11. use warnings;
  12. use File::Find qw(find);
  13. use File::Compare qw(compare);
  14. use File::Basename qw(basename);
  15. use Getopt::Long qw(GetOptions);
  16. my %order_callbacks = (
  17. path => sub { sort @_ },
  18. name => sub {
  19. map { $_->[1] }
  20. sort { $a->[0] cmp $b->[0] }
  21. map { [basename($_), $_] } @_;
  22. },
  23. time => sub {
  24. map { $_->[1] }
  25. sort { $a->[0] <=> $b->[0] }
  26. map { [-M $_, $_] } @_;
  27. },
  28. );
  29. my @dirs = grep { (-d) or (-f) } @ARGV;
  30. die <<"HELP" if !@dirs;
  31. usage: $0 [options] /my/path [...]
  32. Options:
  33. -f, --first : keep only the first duplicated file
  34. -l, --last : keep only the last duplicated file
  35. -s, --symlink : replace duplicate files with symbolic links (with -f or -l)
  36. -o, --order=type : order the results by: path, name or time
  37. -m, --min-size=i : minimum size in bytes (default: 0)
  38. HELP
  39. my $keep_first;
  40. my $keep_last;
  41. my $create_symlinks;
  42. my $order_by = 'time';
  43. my $min_size = 0;
  44. GetOptions(
  45. 'f|first!' => \$keep_first,
  46. 'l|last!' => \$keep_last,
  47. 's|symlink!' => \$create_symlinks,
  48. 'o|order|order-by=s' => \$order_by,
  49. 'm|min-size=i' => \$min_size,
  50. )
  51. or die("$0: error in command line arguments\n");
  52. if (not exists $order_callbacks{$order_by}) {
  53. local $" = ", ";
  54. die "$0: invalid value `$order_by` for `--order`: valid values are: @{[sort keys %order_callbacks]}\n";
  55. }
  56. sub find_duplicated_files (&@) {
  57. my $callback = shift;
  58. my %files;
  59. find {
  60. no_chdir => 1,
  61. wanted => sub {
  62. lstat;
  63. (-f _) && (not -l _) && ((-s _) >= $min_size) && push @{$files{-s _}}, $_;
  64. }
  65. } => @_;
  66. foreach my $files (values %files) {
  67. next if $#{$files} < 1;
  68. my %dups;
  69. foreach my $i (0 .. $#{$files} - 1) {
  70. for (my $j = $i + 1 ; $j <= $#{$files} ; $j++) {
  71. if (compare($files->[$i], $files->[$j]) == 0) {
  72. push @{$dups{$files->[$i]}}, splice @{$files}, $j--, 1;
  73. }
  74. }
  75. }
  76. while (my ($fparent, $fdups) = each %dups) {
  77. $callback->($order_callbacks{$order_by}($fparent, @{$fdups}));
  78. }
  79. }
  80. return;
  81. }
  82. {
  83. local $, = "\n";
  84. local $\ = "\n";
  85. find_duplicated_files {
  86. my (@files) = @_;
  87. print @files, "-" x 80;
  88. my $main_file = (
  89. $keep_first ? shift(@files)
  90. : $keep_last ? pop(@files)
  91. : return
  92. );
  93. foreach my $file (@files) {
  94. print ":: Removing: `$file`";
  95. unlink($file) or do {
  96. warn "error: can't delete file `$file': $!\n";
  97. next;
  98. };
  99. if ($create_symlinks) {
  100. print ":: Symlinking: `$main_file` <- `$file`";
  101. symlink($main_file, $file) or do {
  102. warn "error: can't create symbolic link for `$file': $!\n";
  103. next;
  104. };
  105. }
  106. }
  107. } @dirs;
  108. }